#QC filter, delete nextera adapter and low quality
bash src/03.trimgalore.sh
#Remove adapters
bash src/05.cutadaptASVs were inferred from each sequencing run separately with DADA2 in R
The code for each run is found in:
2022 src/Dada2_2022.html 2023
src/Dada2_2023.html
Import outputs from DADA2 to QIIME2
#Create output results dir
mkdir -p results/02.QIIME2
#Import run 2022 files
#import fasta seqs
qiime tools import --input-path results/01.Dada2/2022/ASVs_2022.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2022.qza
# append missing header to the table for import
cat <(echo -n "#OTU Table") results/01.Dada2/2022/ASV_to_seqs-nochim_2022.tsv > results/02.QIIME2/temp.txt
# convert to biom
biom convert -i results/02.QIIME2/temp.txt -o results/02.QIIME2/temp.biom --table-type="OTU table" --to-hdf5
#create feature table
qiime tools import --input-path results/02.QIIME2/temp.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2022.qza
#Get visual summarizes
qiime feature-table tabulate-seqs \
--i-data results/02.QIIME2/ASV_rep_seq_2022.qza \
--o-visualization results/02.QIIME2/ASV_rep_seq_2022.qzv
qiime feature-table summarize \
--i-table results/02.QIIME2/ASV_table_2022.qza \
--o-visualization results/02.QIIME2/ASV_table_2022.qzv
#Import run 2023 files
#import fasta seqs
qiime tools import --input-path results/01.Dada2/2023/ASVs_2023.fasta --type 'FeatureData[Sequence]' --output-path results/02.QIIME2/ASV_rep_seq_2023.qza
# append missing header to the table for import
cat <(echo -n "#OTU Table") results/01.Dada2/2023/ASV_to_seqs-nochim_2023.tsv > results/02.QIIME2/temp2.txt
# convert to biom
biom convert -i results/02.QIIME2/temp2.txt -o results/02.QIIME2/temp2.biom --table-type="OTU table" --to-hdf5
#create feature table
qiime tools import --input-path results/02.QIIME2/temp2.biom --type 'FeatureTable[Frequency]' --input-format BIOMV210Format --output-path results/02.QIIME2/ASV_table_2023.qza
#Get visual summarizes
qiime feature-table tabulate-seqs \
--i-data results/02.QIIME2/ASV_rep_seq_2023.qza \
--o-visualization results/02.QIIME2/ASV_rep_seq_2023.qzv
qiime feature-table summarize \
--i-table results/02.QIIME2/ASV_table_2023.qza \
--o-visualization results/02.QIIME2/ASV_table_2023.qzvHere I removed all ASVs with a frequency of less than 0.1% of the mean sample depth. This cut-off excludes ASVs that are likely due to MiSeq bleed-through between runs (reported by Illumina to be 0.1% of reads). To calculate this cut-off I identified the mean sample depth, multiplied it by 0.001, and rounded to the nearest integer. This step are describe in this paper
# 2022
#Filter feature table by frequence
qiime feature-table filter-features --i-table results/02.QIIME2/ASV_table_2022.qza --p-min-samples 1 --p-min-frequency 52 --o-filtered-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza
#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qzv
#Filter rep-seqs 2022
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2022.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza
# 2023
#Filter feature table by frequence
qiime feature-table filter-features --i-table results/02.QIIME2/ASV_table_2023.qza --p-min-samples 1 --p-min-frequency 97 --o-filtered-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza
#Get visual feature table summary
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --o-visualization results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qzv
#Filter rep-seqs 2023
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --i-data results/02.QIIME2/ASV_rep_seq_2023.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza#Merge rep seqs
qiime feature-table merge-seqs --i-data results/02.QIIME2/ASV_rep_seq_2022_filter-freq52.qza results/02.QIIME2/ASV_rep_seq_2023_filter-freq97.qza --o-merged-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza
#Merge feature tables
qiime feature-table merge --i-tables results/02.QIIME2/ASV_table_2022_freq52_1minsamp.qza results/02.QIIME2/ASV_table_2023_freq97_1minsamp.qza --p-overlap-method sum --o-merged-table results/02.QIIME2/merged-table-filters-freq52-97.qza
#Get visual summarizes
qiime feature-table tabulate-seqs --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-visualization results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qzv
qiime feature-table summarize --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --o-visualization results/02.QIIME2/merged-table-filters-freq52-97.qzv#Tax classification
qiime feature-classifier classify-sklearn --i-classifier data/dbs/classifier_silva_138_trained.qza --i-reads results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-classification results/02.QIIME2/taxonomy_merge.qza --p-n-jobs 80
#get visualization
qiime metadata tabulate --m-input-file results/02.QIIME2/taxonomy_merge.qza --o-visualization results/02.QIIME2/taxonomy_merge.qzvqiime taxa filter-table --i-table results/02.QIIME2/merged-table-filters-freq52-97.qza --i-taxonomy results/02.QIIME2/taxonomy_merge.qza --p-exclude Archaea,Eukaryota,Mitochondria,Chloroplast --p-include p__ --o-filtered-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza
qiime feature-table summarize --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --o-visualization results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qzv| Type | ASVs | Frequency | Samples |
|---|---|---|---|
| 2022 | 29302 | 2677260 | 53 |
| 2023 | 19143 | 3035299 | 30 |
| 2022-filter-freq | 6316 | 2410664 | 53 |
| 2023-filter-freq | 2935 | 2754795 | 30 |
| Merge-fltr-freq | 6316 | 5165459 | 83 |
| Mrg-fltr-freq-aemc | 6261 | 5084303 | 83 |
Remove in fasta seqs
#remove in fasta sequences
qiime feature-table filter-seqs --i-table results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --i-data results/02.QIIME2/merged-rep_seqs_filters_freq52-97.qza --o-filtered-data results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qzanohup qiime phylogeny align-to-tree-mafft-iqtree \
--p-n-threads auto --i-sequences results/02.QIIME2/ASV_rep_seq_filters_freq_merge_aemc.qza \
--o-alignment results/02.QIIME2/align-mft-iqt.qza \
--o-masked-alignment results/02.QIIME2/masked-align-iqtree.qza \
--o-tree results/02.QIIME2/unrooted-tree-iqtree.qza \
--o-rooted-tree results/02.QIIME2/rooted-tree-iqtree.qza --verbose > outs/07.phylogeny-iqtree.nohup &
[1] 2166700#taxonomy
qiime tools export --input-path results/02.QIIME2/taxonomy_merge.qza --output-path results/02.QIIME2/exports/taxonomy
#feature table
qiime tools export --input-path results/02.QIIME2/ASV_table_filter_freq_merge_aemc.qza --output-path results/02.QIIME2/exports/ASV_table
#reformat taxonomy tsv
sed -i -e '1 s/Feature/#Feature/' -e '1 s/Taxon/taxonomy/' results/02.QIIME2/exports/taxonomy/taxonomy.tsv
#Add taxonomy to feature table
biom add-metadata -i results/02.QIIME2/exports/ASV_table/feature-table.biom -o results/02.QIIME2/exports/feature-table_tax.biom --observation-metadata-fp results/02.QIIME2/exports/taxonomy/taxonomy.tsv --sc-separated results/02.QIIME2/exports/taxonomy/taxonomy.tsv
#Convert to tsv from biom format
biom convert -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax.tsv --to-tsv --header-key taxonomy
#Get effective reads per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom -o results/02.QIIME2/exports/feature-table_tax_reads_per_sample_summary.txt
#Get ASVs per sample
biom summarize-table -i results/02.QIIME2/exports/feature-table_tax.biom --qualitative -o results/02.QIIME2/exports/feature-table_tax_ASVs_per_sample_summary.txtload("src/Diversity.RData")It is recommended to choose specimens that have at least two compartments. This will enable comparison between them.
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Filter
metadata_fltr <- metadata %>%
group_by(Specimen) %>%
filter(n() >= 2) %>%
ungroup()
#save table
write.table(metadata_fltr, "results/03.Diversity/Metadata_fltr.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)Compare
dim(metadata)## [1] 83 13
dim(metadata_fltr)## [1] 72 13
library(ggplot2)
library(ggsci)
library(cowplot)
#all
# factor to reorder plot
metadata$Location <- factor(metadata$Location)
metadata$Source <- factor(metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata$Specimen <- factor(metadata$Specimen)
# plot
samples_plot_all <- ggplot(metadata, aes(x = Specimen, y = ..count.., fill = Source)) +
geom_bar(position = "dodge") +
facet_wrap(~ Location, scales = "free_x") +
labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels
scale_fill_jco()
# fltr
# factor to reorder plot
metadata_fltr$Location <- factor(metadata_fltr$Location)
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
metadata_fltr$Specimen <- factor(metadata_fltr$Specimen)
#plot
samples_plot_fltr <- ggplot(metadata_fltr, aes(x = Specimen, y = ..count.., fill = Source)) +
geom_bar(position = "dodge") +
facet_wrap(~ Location, scales = "free_x") +
labs(title = "Samples by Location and Compartment", y = "Number of Samples", x = "Specimen") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate labels
scale_fill_jco()
samples_distribution_plot <- plot_grid(samples_plot_all, samples_plot_fltr, labels = c("A", "B"), ncol = 1, rel_heights = c(1, 1))## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#save plot
ggsave("results/plots/02.diversity/01.samples_distribution.png", samples_distribution_plot , width = 8, height = 7)
# show
samples_distribution_plot# Filter otu table
# Create vector with identifiers of samples to keep
samples_to_keep <- metadata_fltr$`sample-id`
# Filter OTU table
otu_table_fltr <- otu_table[, samples_to_keep, drop = FALSE]
# check
dim(otu_table)## [1] 6261 83
dim(otu_table_fltr)## [1] 6261 72
library(phyloseq)
library(qiime2R)
#create phyloseq object
ps <- qza_to_phyloseq(
features = "results/02.QIIME2/merged-table-filters-freq52-97.qza",
tree = "results/02.QIIME2/rooted-tree-iqtree.qza",
taxonomy = "results/02.QIIME2/taxonomy_merge.qza",
metadata = "data/metadata.tsv")library(phyloseq)
library(dplyr)
# Filter samples according previous filters
# Extract metadata
meta_data <- as.data.frame(sample_data(ps))
# Filter
filtered_meta_data <- meta_data %>%
group_by(Specimen) %>%
mutate(n_compartments = n_distinct(Source)) %>%
filter(n_compartments >= 2) %>%
ungroup()
# Empty check point
if(nrow(filtered_meta_data) == 0) {
stop("No samples meet the criteria")
}
# get samples
sample_ids_to_keep <- filtered_meta_data$sample
# Sample of interest check point
if(length(sample_ids_to_keep) == 0) {
stop("No valid sample IDs were found. Check your metadata processing steps.")
}
# Prune samples from the phyloseq object
ps2 <- prune_samples(sample_ids_to_keep, ps)
# show
print(ps)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6261 taxa and 83 samples ]
## sample_data() Sample Data: [ 83 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]
print(ps2)## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6261 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6261 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6261 tips and 6260 internal nodes ]
library(devtools)## Loading required package: usethis
library(microbiome)##
## microbiome R package (microbiome.github.com)
##
##
##
## Copyright (C) 2011-2022 Leo Lahti,
## Sudarshan Shetty et al. <microbiome.github.io>
##
## Attaching package: 'microbiome'
## The following object is masked from 'package:ggplot2':
##
## alpha
## The following object is masked from 'package:base':
##
## transform
library(microbiomeutilities)## Warning: replacing previous import 'ggplot2::alpha' by 'microbiome::alpha' when
## loading 'microbiomeutilities'
##
## Attaching package: 'microbiomeutilities'
## The following object is masked from 'package:microbiome':
##
## add_refseq
#check data
microbiome::summarize_phyloseq(ps2)## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 1738693] Total number of reads = 47393804] Average number of reads = 65824.72222222225] Median number of reads = 59703.57] Sparsity = 0.7991002502262686] Any OTU sum to 1 or less? YES8] Number of singletons = 1009] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
##
## [[2]]
## [1] "2] Max. number of reads = 173869"
##
## [[3]]
## [1] "3] Total number of reads = 4739380"
##
## [[4]]
## [1] "4] Average number of reads = 65824.7222222222"
##
## [[5]]
## [1] "5] Median number of reads = 59703.5"
##
## [[6]]
## [1] "7] Sparsity = 0.799100250226268"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 100"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Remove singletons
#Delete singletons
ps3 <- filter_taxa(ps2, function(x) sum(x) > 1, TRUE)Remove Phylum with low prevalence
# Explore prevalence
## Get prevalence
prevdf = apply(X = otu_table(ps3),
MARGIN = ifelse(taxa_are_rows(ps3), yes = 1, no = 2),
FUN = function(x){sum(x > 0)})
## Add taxonomy
prevdf = data.frame(Prevalence = prevdf,
TotalAbundance = taxa_sums(ps3),
tax_table(ps3))
## Check prevalence at Phylum level
plyr::ddply(prevdf, "Phylum", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})plyr::ddply(prevdf, "Genus", function(df1){cbind(mean(df1$Prevalence),sum(df1$Prevalence))})# tax to filter
filtertaxa = c("Elusimicrobiota", "GAL15")
# filter
ps4 = subset_taxa(ps3, !Phylum %in% filtertaxa)Clean names
#check names
#head(microbiome::abundances(ps_fltr))
head(phyloseq::tax_table(ps4))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_1034 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "d__Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## Family Genus Species
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" "uncultured_bacterium"
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA
#Next, cleaning the "d__" and similar values.
# extending gsub to entire table
tax_table(ps4)[, colnames(tax_table(ps4))] <- gsub(tax_table(ps4)[, colnames(tax_table(ps4))], pattern = "[a-z]__", replacement = "")
#change ambiguities
tax_table(ps4)[tax_table(ps4) == "uncultured_bacterium"] <- NA
tax_table(ps4)[tax_table(ps4) == "uncultured"] <- NA
tax_table(ps4)[tax_table(ps4) == "metagenome"] <- NA #check
head(phyloseq::tax_table(ps4))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_1034 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_1428 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4955 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_2133 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_4953 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_6141 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## Family Genus Species
## ASV_1034 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_1428 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4955 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_2133 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_4953 "WD2101_soil_group" "WD2101_soil_group" "uncultured_planctomycete"
## ASV_6141 "WD2101_soil_group" "WD2101_soil_group" NA
Agglomerate levels
#Aggregate at genus level.
ps_family <- phyloseq::tax_glom(ps4, "Family", NArm = TRUE)
ps_genus <- phyloseq::tax_glom(ps4, "Genus", NArm = TRUE)
ps_species <- phyloseq::tax_glom(ps4, "Species", NArm = TRUE)ps_genus <- filter_taxa(ps_genus, function(x) sum(x) > 1, TRUE)
head(phyloseq::tax_table(ps_genus))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class Order
## ASV_519 "Bacteria" "Planctomycetota" "Phycisphaerae" "Tepidisphaerales"
## ASV_5638 "Bacteria" "Planctomycetota" "Phycisphaerae" "Phycisphaerales"
## ASV_4626 "Bacteria" "Planctomycetota" "Phycisphaerae" "mle1-8"
## ASV_4159 "Bacteria" "Planctomycetota" "OM190" "OM190"
## ASV_498 "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"
## ASV_765 "Bacteria" "Planctomycetota" "Planctomycetes" "Pirellulales"
## Family Genus Species
## ASV_519 "WD2101_soil_group" "WD2101_soil_group" NA
## ASV_5638 "Phycisphaeraceae" "AKYG587" NA
## ASV_4626 "mle1-8" "mle1-8" NA
## ASV_4159 "OM190" "OM190" NA
## ASV_498 "Pirellulaceae" "Pir4_lineage" NA
## ASV_765 "Pirellulaceae" "Pirellula" NA
Change id by tax name
#Substitute these IDs with names of genus
#ps_genus <- format_to_besthit(ps_genus) #no me gusta el formato, pero sirve
taxa_names(ps_genus) <- tax_table(ps_genus)[,"Genus"]
head(taxa_names(ps_genus))## [1] "WD2101_soil_group" "AKYG587" "mle1-8"
## [4] "OM190" "Pir4_lineage" "Pirellula"
head(phyloseq::tax_table(ps_genus))## Taxonomy Table: [6 taxa by 7 taxonomic ranks]:
## Kingdom Phylum Class
## WD2101_soil_group "Bacteria" "Planctomycetota" "Phycisphaerae"
## AKYG587 "Bacteria" "Planctomycetota" "Phycisphaerae"
## mle1-8 "Bacteria" "Planctomycetota" "Phycisphaerae"
## OM190 "Bacteria" "Planctomycetota" "OM190"
## Pir4_lineage "Bacteria" "Planctomycetota" "Planctomycetes"
## Pirellula "Bacteria" "Planctomycetota" "Planctomycetes"
## Order Family Genus
## WD2101_soil_group "Tepidisphaerales" "WD2101_soil_group" "WD2101_soil_group"
## AKYG587 "Phycisphaerales" "Phycisphaeraceae" "AKYG587"
## mle1-8 "mle1-8" "mle1-8" "mle1-8"
## OM190 "OM190" "OM190" "OM190"
## Pir4_lineage "Pirellulales" "Pirellulaceae" "Pir4_lineage"
## Pirellula "Pirellulales" "Pirellulaceae" "Pirellula"
## Species
## WD2101_soil_group NA
## AKYG587 NA
## mle1-8 NA
## OM190 NA
## Pir4_lineage NA
## Pirellula NA
Clean rare characters
# Remove characters
taxa_names(ps_genus) <- gsub(" ", ".", taxa_names(ps_genus))
taxa_names(ps_genus) <- gsub("\\(|\\)", "", taxa_names(ps_genus))
#check
unique(tax_table(ps_genus)[,"Genus"] )## Taxonomy Table: [418 taxa by 1 taxonomic ranks]:
## Genus
## WD2101_soil_group "WD2101_soil_group"
## AKYG587 "AKYG587"
## mle1-8 "mle1-8"
## OM190 "OM190"
## Pir4_lineage "Pir4_lineage"
## Pirellula "Pirellula"
## SH-PL14 "SH-PL14"
## Planctomicrobium "Planctomicrobium"
## Schlesneria "Schlesneria"
## Gemmata "Gemmata"
## Fimbriiglobus "Fimbriiglobus"
## Zavarzinella "Zavarzinella"
## Gemmataceae "Gemmataceae"
## Aquisphaera "Aquisphaera"
## Singulisphaera "Singulisphaera"
## Tundrisphaera "Tundrisphaera"
## Latescibacterota "Latescibacterota"
## Latescibacteraceae "Latescibacteraceae"
## Terrimicrobium "Terrimicrobium"
## Candidatus_Udaeobacter "Candidatus_Udaeobacter"
## Candidatus_Xiphinematobacter "Candidatus_Xiphinematobacter"
## Chthoniobacter "Chthoniobacter"
## Pedosphaeraceae "Pedosphaeraceae"
## ADurb.Bin063-1 "ADurb.Bin063-1"
## DEV008 "DEV008"
## Ellin517 "Ellin517"
## Pedosphaera "Pedosphaera"
## Ellin516 "Ellin516"
## Candidatus_Kaiserbacteria "Candidatus_Kaiserbacteria"
## Opitutus "Opitutus"
## Lacunisphaera "Lacunisphaera"
## Cerasicoccus "Cerasicoccus"
## Subgroup_17 "Subgroup_17"
## Subgroup_25 "Subgroup_25"
## Subgroup_9 "Subgroup_9"
## Vicinamibacteraceae "Vicinamibacteraceae"
## Luteitalea "Luteitalea"
## Vicinamibacter "Vicinamibacter"
## Edaphobacter "Edaphobacter"
## Terriglobus "Terriglobus"
## Granulicella "Granulicella"
## Acidipila "Acidipila"
## Acidicapsa "Acidicapsa"
## Occallatibacter "Occallatibacter"
## Terracidiphilus "Terracidiphilus"
## Candidatus_Koribacter "Candidatus_Koribacter"
## Subgroup_13 "Subgroup_13"
## Subgroup_5 "Subgroup_5"
## Subgroup_11 "Subgroup_11"
## Neochlamydia "Neochlamydia"
## Candidatus_Protochlamydia "Candidatus_Protochlamydia"
## cvE6 "cvE6"
## Pla4_lineage "Pla4_lineage"
## Ilumatobacter "Ilumatobacter"
## CL500-29_marine_group "CL500-29_marine_group"
## Iamia "Iamia"
## IMCC26256 "IMCC26256"
## 0319-7L14 "0319-7L14"
## Actinomadura "Actinomadura"
## Actinoallomurus "Actinoallomurus"
## Acidothermus "Acidothermus"
## Actinocorallia "Actinocorallia"
## Acrocarpospora "Acrocarpospora"
## Planomonospora "Planomonospora"
## Sphaerisporangium "Sphaerisporangium"
## Streptosporangiaceae "Streptosporangiaceae"
## Nonomuraea "Nonomuraea"
## Streptosporangium "Streptosporangium"
## Streptomyces "Streptomyces"
## Streptacidiphilus "Streptacidiphilus"
## Kitasatospora "Kitasatospora"
## Jiangella "Jiangella"
## Aeromicrobium "Aeromicrobium"
## Marmoricola "Marmoricola"
## Nocardioides "Nocardioides"
## Microlunatus "Microlunatus"
## Cutibacterium "Cutibacterium"
## Actinopolymorpha "Actinopolymorpha"
## Flindersiella "Flindersiella"
## Kribbella "Kribbella"
## Tenggerimyces "Tenggerimyces"
## Actinospica "Actinospica"
## Catenulispora "Catenulispora"
## Lechevalieria "Lechevalieria"
## Saccharothrix "Saccharothrix"
## Amycolatopsis "Amycolatopsis"
## Actinophytocola "Actinophytocola"
## Kutzneria "Kutzneria"
## Actinomycetospora "Actinomycetospora"
## Pseudonocardia "Pseudonocardia"
## Kibdelosporangium "Kibdelosporangium"
## Hamadaea "Hamadaea"
## Rugosimonospora "Rugosimonospora"
## Catellatospora "Catellatospora"
## Actinoplanes "Actinoplanes"
## Rhizocola "Rhizocola"
## Allocatelliglobosispora "Allocatelliglobosispora"
## Longispora "Longispora"
## Virgisporangium "Virgisporangium"
## Dactylosporangium "Dactylosporangium"
## Krasilnikovia "Krasilnikovia"
## Luedemannella "Luedemannella"
## Micromonospora "Micromonospora"
## Asanoa "Asanoa"
## Mycobacterium "Mycobacterium"
## Corynebacterium "Corynebacterium"
## Nocardia "Nocardia"
## Rhodococcus "Rhodococcus"
## Jatrophihabitans "Jatrophihabitans"
## Cryptosporangium "Cryptosporangium"
## Frankia "Frankia"
## Crossiella "Crossiella"
## Saccharopolyspora "Saccharopolyspora"
## Frankiales "Frankiales"
## Geodermatophilus "Geodermatophilus"
## Blastococcus "Blastococcus"
## Modestobacter "Modestobacter"
## Nakamurella "Nakamurella"
## Intrasporangium "Intrasporangium"
## Pseudarthrobacter "Pseudarthrobacter"
## Paenarthrobacter "Paenarthrobacter"
## Agromyces "Agromyces"
## Leifsonia "Leifsonia"
## Microbacterium "Microbacterium"
## Curtobacterium "Curtobacterium"
## Cellulomonas "Cellulomonas"
## Isoptericola "Isoptericola"
## Sporichthya "Sporichthya"
## 67-14 "67-14"
## Conexibacter "Conexibacter"
## Solirubrobacter "Solirubrobacter"
## Parviterribacter "Parviterribacter"
## Solirubrobacteraceae "Solirubrobacteraceae"
## Gaiella "Gaiella"
## MB-A2-108 "MB-A2-108"
## Rubrobacter "Rubrobacter"
## Gitt-GS-136 "Gitt-GS-136"
## KD4-96 "KD4-96"
## Thermosporothrix "Thermosporothrix"
## 1959-1 "1959-1"
## Ktedonobacter "Ktedonobacter"
## Ktedonobacterales "Ktedonobacterales"
## JG30-KF-AS9 "JG30-KF-AS9"
## JG30-KF-CM45 "JG30-KF-CM45"
## AKYG1722 "AKYG1722"
## FFCH7168 "FFCH7168"
## AKIW781 "AKIW781"
## C0119 "C0119"
## S085 "S085"
## OLB14 "OLB14"
## JG30-KF-CM66 "JG30-KF-CM66"
## SAR202_clade "SAR202_clade"
## TK10 "TK10"
## Litorilinea "Litorilinea"
## A4b "A4b"
## OLB13 "OLB13"
## UTCFX1 "UTCFX1"
## RBG-13-54-9 "RBG-13-54-9"
## SBR1031 "SBR1031"
## Domibacillus "Domibacillus"
## Brevibacillus "Brevibacillus"
## Geobacillus "Geobacillus"
## Lysinibacillus "Lysinibacillus"
## Kurthia "Kurthia"
## Paenisporosarcina "Paenisporosarcina"
## Staphylococcus "Staphylococcus"
## Tepidibacillus "Tepidibacillus"
## Fictibacillus "Fictibacillus"
## Bacillus "Bacillus"
## Terribacillus "Terribacillus"
## Lactococcus "Lactococcus"
## Ammoniphilus "Ammoniphilus"
## Paenibacillus "Paenibacillus"
## Cohnella "Cohnella"
## Baia "Baia"
## Tumebacillus "Tumebacillus"
## AD3 "AD3"
## Sporomusa "Sporomusa"
## Anaerospora "Anaerospora"
## Obscuribacteraceae "Obscuribacteraceae"
## Oxynema_BDU_92071 "Oxynema_BDU_92071"
## WPS-2 "WPS-2"
## Bryobacter "Bryobacter"
## Candidatus_Solibacter "Candidatus_Solibacter"
## Paraclostridium "Paraclostridium"
## Romboutsia "Romboutsia"
## Clostridium_sensu_stricto_8 "Clostridium_sensu_stricto_8"
## Clostridium_sensu_stricto_12 "Clostridium_sensu_stricto_12"
## Clostridium_sensu_stricto_9 "Clostridium_sensu_stricto_9"
## Clostridium_sensu_stricto_3 "Clostridium_sensu_stricto_3"
## Clostridium_sensu_stricto_13 "Clostridium_sensu_stricto_13"
## Herbinix "Herbinix"
## Anaerocolumna "Anaerocolumna"
## Saccharimonadales "Saccharimonadales"
## TM7a "TM7a"
## LWQ8 "LWQ8"
## TM7 "TM7"
## Fimbriimonadaceae "Fimbriimonadaceae"
## Armatimonadales "Armatimonadales"
## Kapabacteriales "Kapabacteriales"
## Ruminiclostridium "Ruminiclostridium"
## Anaerobacterium "Anaerobacterium"
## RB41 "RB41"
## JGI_0001001-H03 "JGI_0001001-H03"
## Blastocatella "Blastocatella"
## Aridibacter "Aridibacter"
## Stenotrophobacter "Stenotrophobacter"
## 11-24 "11-24"
## DS-100 "DS-100"
## Subgroup_2 "Subgroup_2"
## Rokubacteriales "Rokubacteriales"
## WX65 "WX65"
## Subgroup_7 "Subgroup_7"
## Vermiphilaceae "Vermiphilaceae"
## Subgroup_10 "Subgroup_10"
## Subgroup_22 "Subgroup_22"
## Subgroup_18 "Subgroup_18"
## Entotheonellaceae "Entotheonellaceae"
## Candidatus_Entotheonella "Candidatus_Entotheonella"
## Gemmatimonas "Gemmatimonas"
## Roseisolibacter "Roseisolibacter"
## S0134_terrestrial_group "S0134_terrestrial_group"
## BD2-11_terrestrial_group "BD2-11_terrestrial_group"
## YC-ZSS-LKJ147 "YC-ZSS-LKJ147"
## Longimicrobiaceae "Longimicrobiaceae"
## Minicystis "Minicystis"
## Polyangium "Polyangium"
## Eel-36e1D6 "Eel-36e1D6"
## BIrii41 "BIrii41"
## Phaselicystis "Phaselicystis"
## mle1-27 "mle1-27"
## Pajaroellobacter "Pajaroellobacter"
## Haliangium "Haliangium"
## Pseudenhygromyxa "Pseudenhygromyxa"
## Nannocystis "Nannocystis"
## Myxococcus "Myxococcus"
## Archangium "Archangium"
## P3OB-42 "P3OB-42"
## Anaeromyxobacter "Anaeromyxobacter"
## Blfdi19 "Blfdi19"
## NB1-j "NB1-j"
## OM27_clade "OM27_clade"
## Geobacter "Geobacter"
## Bdellovibrio "Bdellovibrio"
## MBNT15 "MBNT15"
## bacteriap25 "bacteriap25"
## RCP2-54 "RCP2-54"
## Holophaga "Holophaga"
## Babeliales "Babeliales"
## Nitrospira "Nitrospira"
## 4-29-1 "4-29-1"
## PMMR1 "PMMR1"
## Asticcacaulis "Asticcacaulis"
## Caulobacter "Caulobacter"
## Phenylobacterium "Phenylobacterium"
## A0839 "A0839"
## Nordella "Nordella"
## Rhodomicrobium "Rhodomicrobium"
## Pedomicrobium "Pedomicrobium"
## Hyphomicrobium "Hyphomicrobium"
## Labrys "Labrys"
## Phreatobacter "Phreatobacter"
## Amphiplicatus "Amphiplicatus"
## Bosea "Bosea"
## Neo-b11 "Neo-b11"
## Methylobacterium-Methylorubrum "Methylobacterium-Methylorubrum"
## Microvirga "Microvirga"
## FFCH5858 "FFCH5858"
## Xanthobacteraceae "Xanthobacteraceae"
## Bauldia "Bauldia"
## alphaI_cluster "alphaI_cluster"
## Methylovirgula "Methylovirgula"
## 28-YEA-48 "28-YEA-48"
## D05-2 "D05-2"
## Rubellimicrobium "Rubellimicrobium"
## Amaricoccus "Amaricoccus"
## Kaistia "Kaistia"
## Hirschia "Hirschia"
## SWB02 "SWB02"
## Roseiarcus "Roseiarcus"
## Amb-16S-1323 "Amb-16S-1323"
## Pseudorhodoplanes "Pseudorhodoplanes"
## Pseudolabrys "Pseudolabrys"
## Bradyrhizobium "Bradyrhizobium"
## Rhodoplanes "Rhodoplanes"
## Starkeya "Starkeya"
## Devosia "Devosia"
## Methyloligellaceae "Methyloligellaceae"
## Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium "Allorhizobium-Neorhizobium-Pararhizobium-Rhizobium"
## Mesorhizobium "Mesorhizobium"
## Ensifer "Ensifer"
## Ochrobactrum "Ochrobactrum"
## Phyllobacterium "Phyllobacterium"
## Aureimonas "Aureimonas"
## KF-JG30-B3 "KF-JG30-B3"
## Telmatospirillum "Telmatospirillum"
## Sphingomonas "Sphingomonas"
## Ellin6055 "Ellin6055"
## Novosphingobium "Novosphingobium"
## Altererythrobacter "Altererythrobacter"
## Qipengyuania "Qipengyuania"
## Sphingobium "Sphingobium"
## Haematospirillum "Haematospirillum"
## Aliidongia "Aliidongia"
## Reyranella "Reyranella"
## Candidatus_Paracaedibacter "Candidatus_Paracaedibacter"
## Dongia "Dongia"
## Acidisoma "Acidisoma"
## Craurococcus-Caldovatus "Craurococcus-Caldovatus"
## Rhodovastum "Rhodovastum"
## Rhodopila "Rhodopila"
## Acidocella "Acidocella"
## Inquilinus "Inquilinus"
## Azospirillum "Azospirillum"
## Skermanella "Skermanella"
## Constrictibacter "Constrictibacter"
## Candidatus_Alysiosphaera "Candidatus_Alysiosphaera"
## URHD0088 "URHD0088"
## Steroidobacter "Steroidobacter"
## R7C24 "R7C24"
## JG36-TzT-191 "JG36-TzT-191"
## Acidibacter "Acidibacter"
## WD260 "WD260"
## KF-JG30-C25 "KF-JG30-C25"
## PLTA13 "PLTA13"
## CCD24 "CCD24"
## Legionella "Legionella"
## Candidatus_Ovatusbacter "Candidatus_Ovatusbacter"
## JTB255_marine_benthic_group "JTB255_marine_benthic_group"
## Pantoea "Pantoea"
## Escherichia-Shigella "Escherichia-Shigella"
## Serratia "Serratia"
## Enterobacter "Enterobacter"
## Klebsiella "Klebsiella"
## KI89A_clade "KI89A_clade"
## Gammaproteobacteria "Gammaproteobacteria"
## Pseudomonas "Pseudomonas"
## OM60NOR5_clade "OM60(NOR5)_clade"
## Enhydrobacter "Enhydrobacter"
## Acinetobacter "Acinetobacter"
## Alkanindiges "Alkanindiges"
## Cavicella "Cavicella"
## Aquicella "Aquicella"
## Dinghuibacter "Dinghuibacter"
## Chitinophaga "Chitinophaga"
## Flavihumibacter "Flavihumibacter"
## Parafilimonas "Parafilimonas"
## Filimonas "Filimonas"
## Terrimonas "Terrimonas"
## Ferruginibacter "Ferruginibacter"
## UTBCD1 "UTBCD1"
## Puia "Puia"
## Flavitalea "Flavitalea"
## Niastella "Niastella"
## Flavisolibacter "Flavisolibacter"
## Edaphobaculum "Edaphobaculum"
## Taibaiella "Taibaiella"
## Sphingobacterium "Sphingobacterium"
## Olivibacter "Olivibacter"
## Mucilaginibacter "Mucilaginibacter"
## Solitalea "Solitalea"
## env.OPS_17 "env.OPS_17"
## AKYH767 "AKYH767"
## NS11-12_marine_group "NS11-12_marine_group"
## CWT_CU03-E12 "CWT_CU03-E12"
## Bacteroidetes_VC2.1_Bac22 "Bacteroidetes_VC2.1_Bac22"
## Chryseobacterium "Chryseobacterium"
## Cloacibacterium "Cloacibacterium"
## Flavobacterium "Flavobacterium"
## Candidatus_Brownia "Candidatus_Brownia"
## Rhodocytophaga "Rhodocytophaga"
## Dyadobacter "Dyadobacter"
## Chryseolinea "Chryseolinea"
## Ohtaekwangia "Ohtaekwangia"
## OLB12 "OLB12"
## Adhaeribacter "Adhaeribacter"
## Nevskia "Nevskia"
## Pseudoxanthomonas "Pseudoxanthomonas"
## Stenotrophomonas "Stenotrophomonas"
## Xanthomonas "Xanthomonas"
## Fulvimonas "Fulvimonas"
## Rudaea "Rudaea"
## Dyella "Dyella"
## Luteibacter "Luteibacter"
## Rhodanobacter "Rhodanobacter"
## Arenimonas "Arenimonas"
## Lysobacter "Lysobacter"
## Massilia "Massilia"
## Silvimonas "Silvimonas"
## Piscinibacter "Piscinibacter"
## Rhizobacter "Rhizobacter"
## Variovorax "Variovorax"
## Caenimonas "Caenimonas"
## Curvibacter "Curvibacter"
## Ramlibacter "Ramlibacter"
## Azohydromonas "Azohydromonas"
## Paucibacter "Paucibacter"
## Pelomonas "Pelomonas"
## Roseateles "Roseateles"
## Comamonas "Comamonas"
## Pandoraea "Pandoraea"
## Bordetella "Bordetella"
## Achromobacter "Achromobacter"
## Noviherbaspirillum "Noviherbaspirillum"
## Cupriavidus "Cupriavidus"
## Herbaspirillum "Herbaspirillum"
## mle1-7 "mle1-7"
## TRA3-20 "TRA3-20"
## GOUTA6 "GOUTA6"
## MND1 "MND1"
## IS-44 "IS-44"
## A21b "A21b"
## SC-I-84 "SC-I-84"
## Nitrosospira "Nitrosospira"
## B1-7BS "B1-7BS"
## Niveibacterium "Niveibacterium"
## Ellin6067 "Ellin6067"
## Burkholderia-Caballeronia-Paraburkholderia "Burkholderia-Caballeronia-Paraburkholderia"
# check data
table(meta(ps4)$Location, meta(ps4)$Source)##
## Rhizhomorph Soil associated Stipe
## El Castillo/Zentla 8 9 9
## Monte obscuro Emiliano Zapata 2 7 7
## Naolinco 10 10 10
#subsets
ps_ElCastillo <- subset_samples(ps4, Location == "El Castillo/Zentla")
ps_MonteObscuro <- subset_samples(ps4, Location == "Monte obscuro Emiliano Zapata")
ps_Naolinco <- subset_samples(ps4, Location == "Naolinco")
ps_ElCastillo## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_MonteObscuro## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps_Naolinco## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 30 samples ]
## sample_data() Sample Data: [ 30 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
#check data
microbiome::summarize_phyloseq(ps_ElCastillo)## Compositional = NO2
## 1] Min. number of reads = 322442] Max. number of reads = 676833] Total number of reads = 13117814] Average number of reads = 50453.11538461545] Median number of reads = 516897] Sparsity = 0.82902444202986] Any OTU sum to 1 or less? YES8] Number of singletons = 28859] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 32244"
##
## [[2]]
## [1] "2] Max. number of reads = 67683"
##
## [[3]]
## [1] "3] Total number of reads = 1311781"
##
## [[4]]
## [1] "4] Average number of reads = 50453.1153846154"
##
## [[5]]
## [1] "5] Median number of reads = 51689"
##
## [[6]]
## [1] "7] Sparsity = 0.8290244420298"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 2885"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_MonteObscuro)## Compositional = NO2
## 1] Min. number of reads = 238902] Max. number of reads = 682953] Total number of reads = 7181394] Average number of reads = 44883.68755] Median number of reads = 414317] Sparsity = 0.7536531904529966] Any OTU sum to 1 or less? YES8] Number of singletons = 24069] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 23890"
##
## [[2]]
## [1] "2] Max. number of reads = 68295"
##
## [[3]]
## [1] "3] Total number of reads = 718139"
##
## [[4]]
## [1] "4] Average number of reads = 44883.6875"
##
## [[5]]
## [1] "5] Median number of reads = 41431"
##
## [[6]]
## [1] "7] Sparsity = 0.753653190452996"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 2406"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
microbiome::summarize_phyloseq(ps_Naolinco)## Compositional = NO2
## 1] Min. number of reads = 442602] Max. number of reads = 1738693] Total number of reads = 27092974] Average number of reads = 90309.95] Median number of reads = 874747] Sparsity = 0.7894625750933596] Any OTU sum to 1 or less? YES8] Number of singletons = 32619] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 44260"
##
## [[2]]
## [1] "2] Max. number of reads = 173869"
##
## [[3]]
## [1] "3] Total number of reads = 2709297"
##
## [[4]]
## [1] "4] Average number of reads = 90309.9"
##
## [[5]]
## [1] "5] Median number of reads = 87474"
##
## [[6]]
## [1] "7] Sparsity = 0.789462575093359"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? YES"
##
## [[8]]
## [1] "8] Number of singletons = 3261"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Remove singletons
#Delete singletons
ps_ElCastillo <- filter_taxa(ps_ElCastillo, function(x) sum(x) > 1, TRUE)
ps_MonteObscuro <- filter_taxa(ps_MonteObscuro, function(x) sum(x) > 1, TRUE)
ps_Naolinco <- filter_taxa(ps_Naolinco, function(x) sum(x) > 1, TRUE)
#check
ps_ElCastillo## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3274 taxa and 26 samples ]
## sample_data() Sample Data: [ 26 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 3274 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3274 tips and 3273 internal nodes ]
ps_MonteObscuro## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 3753 taxa and 16 samples ]
## sample_data() Sample Data: [ 16 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 3753 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 3753 tips and 3752 internal nodes ]
ps_Naolinco## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 2898 taxa and 30 samples ]
## sample_data() Sample Data: [ 30 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 2898 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 2898 tips and 2897 internal nodes ]
# Check
table(meta(ps_ElCastillo_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 8 9 9
table(meta(ps_MonteObscuro_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 2 7 7
table(meta(ps_Naolinco_rel_f)$Source)##
## Rhizhomorph Soil associated Stipe
## 10 10 10
library(microbiome)
library(microbiomeutilities)
#function
convert_to_compositional_and_index_reformat <- function(ps) {
ps_rel <- microbiome::transform(ps, "compositional")
ps_rel_f <- format_to_besthit(ps_rel)
return(ps_rel_f)
}
# phyloseq objects
phyloseq_objects <- list(
ps_ElCastillo = ps_ElCastillo,
ps_MonteObscuro = ps_MonteObscuro,
ps_Naolinco = ps_Naolinco,
ps4 = ps4
)
# Apply
phyloseq_objects_rel <- lapply(phyloseq_objects, convert_to_compositional_and_index_reformat)
# Subset
ps_ElCastillo_rel_f <- phyloseq_objects_rel$ps_ElCastillo
ps_MonteObscuro_rel_f <- phyloseq_objects_rel$ps_MonteObscuro
ps_Naolinco_rel_f <- phyloseq_objects_rel$ps_Naolinco
ps_full_rel_f <- phyloseq_objects_rel$ps4# Get full core with microbiome
full_ps_core_relative <- core(ps_full_rel_f, 0, 0.5)
full_ps_core_counts <- core(ps4, 0, 0.5)
# Collapse the rare taxa into an “Other” category
#full_ps_core_fltOthers <- aggregate_rare(ps4_rel, "Genus", detection = 0, prevalence = 0.5)
# Compare phyloseq objects
ps_full_rel_f## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
ps4## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 6159 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 6159 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 6159 tips and 6158 internal nodes ]
full_ps_core_relative## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 365 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 365 taxa by 8 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
full_ps_core_counts## phyloseq-class experiment-level object
## otu_table() OTU Table: [ 365 taxa and 72 samples ]
## sample_data() Sample Data: [ 72 samples by 12 sample variables ]
## tax_table() Taxonomy Table: [ 365 taxa by 7 taxonomic ranks ]
## phy_tree() Phylogenetic Tree: [ 365 tips and 364 internal nodes ]
microbiome::summarize_phyloseq(full_ps_core_counts)## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
##
## [[2]]
## [1] "2] Max. number of reads = 132023"
##
## [[3]]
## [1] "3] Total number of reads = 2296141"
##
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
##
## [[5]]
## [1] "5] Median number of reads = 26767.5"
##
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
Create full, core and location ampvis objects
library(tibble)
library(ampvis2)
# Ampvis object function
create_ampvis_object <- function(pseq) {
# create and extract otu table
otu_table_ampvis <- data.frame(OTU = rownames(phyloseq::otu_table(pseq)@.Data),
phyloseq::otu_table(pseq)@.Data,
phyloseq::tax_table(pseq)@.Data,
check.names = FALSE)
# Metadata
meta_data_ampvis <- data.frame(phyloseq::sample_data(pseq),
check.names = FALSE)
# change index by SampleID
meta_data_ampvis <- meta_data_ampvis %>% rownames_to_column(var = "SampleID")
# ampvis object
av2 <- amp_load(otu_table_ampvis, meta_data_ampvis)
return(av2)
}# phyloseq objects
phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)
# Apply function
ampvis_objects <- lapply(phyloseq_objects, create_ampvis_object)
ampvis_objects## $ps4
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 72 6159 4739217 23890 173869 59703.5
## Avg#Reads
## 65822.46
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 6159(100%) 6159(100%) 6128(99.5%) 6058(98.36%) 5483(89.02%) 4442(72.12%)
## Species
## 1182(19.19%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_ElCastillo
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 26 3274 1311781 32244 67683 51689
## Avg#Reads
## 50453.12
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 3274(100%) 3274(100%) 3256(99.45%) 3216(98.23%) 2907(88.79%) 2311(70.59%)
## Species
## 705(21.53%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_MonteObscuro
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 16 3753 718139 23890 68295 41431
## Avg#Reads
## 44883.69
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 3753(100%) 3753(100%) 3734(99.49%) 3692(98.37%) 3393(90.41%) 2815(75.01%)
## Species
## 615(16.39%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $ps_Naolinco
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 30 2898 2709297 44260 173869 87474
## Avg#Reads
## 90309.9
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 2898(100%) 2898(100%) 2887(99.62%) 2848(98.27%) 2570(88.68%) 2093(72.22%)
## Species
## 570(19.67%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
##
## $full_ps_core_counts
## ampvis2 object with 3 elements.
## Summary of OTU table:
## Samples OTUs Total#Reads Min#Reads Max#Reads Median#Reads
## 72 365 2296141 3289 132023 26767.5
## Avg#Reads
## 31890.85
##
## Assigned taxonomy:
## Kingdom Phylum Class Order Family Genus
## 365(100%) 365(100%) 363(99.45%) 358(98.08%) 331(90.68%) 262(71.78%)
## Species
## 80(21.92%)
##
## Metadata variables: 13
## SampleID, sample, Source, Date, Year, Location, Week, Raw.reads, Colect_number, Specimen, quant_reading, Effective_reads, ASVs
#subset amp obj
phyloseq_objects <- list(ps4 = ps4, ps_ElCastillo = ps_ElCastillo, ps_MonteObscuro = ps_MonteObscuro, ps_Naolinco = ps_Naolinco, full_ps_core_counts = full_ps_core_counts)
av2_full <- ampvis_objects$ps4
av2_core <- ampvis_objects$full_ps_core_counts
av2_ElCastillo <- ampvis_objects$ps_ElCastillo
av2_MonteObscuro <-ampvis_objects$ps_MonteObscuro
av2_Naolinco <- ampvis_objects$ps_Naolinco# Full
core_source_av2_full <- amp_venn(av2_full, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_full <- core_source_av2_full$Otutable
#save core compartments otu table
write.table(core_otu_table_av2_full, "results/03.Diversity/Core_compartments_otu_table_ampvis_full.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)# El Castillo
core_source_av2_ElCastillo <- amp_venn(av2_ElCastillo, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_ElCastillo <- core_source_av2_ElCastillo$Otutable
write.table(core_otu_table_av2_ElCastillo, "results/03.Diversity/Core_compartments_otu_table_ampvis_ElCastillo.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# Monte Obscuro
core_source_av2_MonteObscuro <- amp_venn(av2_MonteObscuro, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_MonteObscuro <- core_source_av2_MonteObscuro$Otutable
write.table(core_otu_table_av2_MonteObscuro, "results/03.Diversity/Core_compartments_otu_table_ampvis_MonteObscuro.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# Naolinco
core_source_av2_Naolinco <- amp_venn(av2_Naolinco, group_by = "Source", cut_a = 0, cut_f = 50, text_size = 6, detailed_output=TRUE)
core_otu_table_av2_Naolinco <- core_source_av2_Naolinco$Otutable
write.table(core_otu_table_av2_Naolinco, "results/03.Diversity/Core_compartments_otu_table_ampvis_Naolinco.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)Based here
Calculate core between Compartments
# count number of samples in each group
table(meta(ps4)$Source, useNA = "always")##
## Rhizhomorph Soil associated Stipe <NA>
## 20 26 26 0
#List of compartments
compartments <- unique(as.character(meta(ps_full_rel_f)$Source))
print(compartments)## [1] "Stipe" "Soil associated" "Rhizhomorph"
#Write a for loop to go through each of the compartments one by one and combine identified core taxa into a list.
list_core_full <- list()# an empty object to store information
for (compartment in compartments){
#print(paste0("Identifying Core Taxa for ", compartment))
ps_sub_full <- subset_samples(ps_full_rel_f, Source == compartment)
core_m_full <- core_members(ps_sub_full,
detection = 0, # 100 % samples
prevalence = 0.5) # 50% prevalence
print(paste0("No. of core taxa in ", compartment, " : ", length(core_m_full)))
# add to a list core taxa for each group.
list_core_full[[compartment]] <- core_m_full
#create core compartment variable
variable_name_full <- paste0("core_full_", gsub(" ", "_", compartment))
assign(variable_name_full, core_m_full, envir = .GlobalEnv)
#write all cores
file_name_full <- paste0("results/03.Diversity/",variable_name,".csv")
write.csv(core_m_full, file = file_name_full, row.names = FALSE)
}## [1] "No. of core taxa in Stipe : 135"
## [1] "No. of core taxa in Soil associated : 902"
## [1] "No. of core taxa in Rhizhomorph : 536"
Plot venn diagram
library(eulerr)
# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")
# Reorder
list_core_ordered <- list_core_full[desired_order]
# Circular venn
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")
plot_venn_venn <- plot(venn(list_core_ordered), fills = venn_colors, alpha = 0.9)
plot_venn_venn# Real size venn
euler_venn <- euler(list_core_ordered, shape = "ellipse")
plot_euler_venn <- plot(euler_venn, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_euler_vennGet core identities
Define colors and order
# factor to reorder plot
desired_order <- c("Soil associated", "Rhizhomorph", "Stipe")
venn_colors <- c(`Soil associated`="#0073C2FF", Rhizhomorph="#EFC000FF", Stipe="#868686FF")El Castillo
# El Castillo
compartmentsEC <- unique(as.character(meta(ps_ElCastillo_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaEC <- list()
for (compartment in compartmentsEC) {
ps_sub <- subset_samples(ps_ElCastillo_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaEC[[compartment]] <- core_m
cat("Compartimiento:", compartment, "\n")
cat("Número de core taxa:", length(core_m), "\n")
cat("Algunos core taxa:", head(core_m), "\n")
}## Compartimiento: Stipe
## Número de core taxa: 310
## Algunos core taxa: ASV_1894:f__Pirellulaceae ASV_1226:uncultured_Planctomyces ASV_1830:uncultured_Planctomyces ASV_763:uncultured_Planctomyces ASV_3514:uncultured_Planctomyces ASV_2226:uncultured_Planctomycetaceae
## Compartimiento: Rhizhomorph
## Número de core taxa: 582
## Algunos core taxa: ASV_3775:g__WD2101_soil_group ASV_2220:g__WD2101_soil_group ASV_1924:uncultured_Pasteuria ASV_765:uncultured_Pasteuria ASV_2059:f__Pirellulaceae ASV_1226:uncultured_Planctomyces
## Compartimiento: Soil associated
## Número de core taxa: 1239
## Algunos core taxa: ASV_5678:g__WD2101_soil_group ASV_3775:g__WD2101_soil_group ASV_3038:g__WD2101_soil_group ASV_5668:Planctomycetales_bacterium ASV_2220:g__WD2101_soil_group ASV_2821:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_EC <- list_core_taxaEC[desired_order]
# Circular venn
plot_venn_venn_EC <- plot(venn(list_core_ordered_EC), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_EC <- euler(list_core_ordered_EC, shape = "ellipse")
plot_euler_venn_EC <- plot(euler_venn_EC, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_ECplot_euler_venn_ECMonte Obscuro
# El Castillo
compartmentsMO <- unique(as.character(meta(ps_MonteObscuro_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaMO <- list()
for (compartment in compartmentsMO) {
ps_sub <- subset_samples(ps_MonteObscuro_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaMO[[compartment]] <- core_m
cat("Compartment:", compartment, "\n")
cat("Number of core taxa:", length(core_m), "\n")
cat("Some core taxa:", head(core_m), "\n")
}## Compartment: Stipe
## Number of core taxa: 374
## Some core taxa: ASV_1035:uncultured_Planctomycetaceae ASV_1218:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage ASV_4143:uncultured_soil ASV_2265:f__Pirellulaceae
## Compartment: Soil associated
## Number of core taxa: 2170
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_1428:g__WD2101_soil_group ASV_4955:g__WD2101_soil_group ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group
## Compartment: Rhizhomorph
## Number of core taxa: 1285
## Some core taxa: ASV_2133:g__WD2101_soil_group ASV_4953:uncultured_planctomycete ASV_1797:g__WD2101_soil_group ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group ASV_779:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_MO <- list_core_taxaMO[desired_order]
# Circular venn
plot_venn_venn_MO <- plot(venn(list_core_ordered_MO), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_MO <- euler(list_core_ordered_MO, shape = "ellipse")
plot_euler_venn_MO <- plot(euler_venn_MO, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_MOplot_euler_venn_MONaolinco
# El Castillo
compartmentsN <- unique(as.character(meta(ps_Naolinco_rel_f)$Source))
# Lista para guardar los core taxa de cada compartimento
list_core_taxaN <- list()
for (compartment in compartmentsN) {
ps_sub <- subset_samples(ps_Naolinco_rel_f, Source == compartment)
core_m <- core_members(ps_sub, detection = 0, prevalence = 0.5)
# Guardar los resultados en una lista
list_core_taxaN[[compartment]] <- core_m
cat("Compartment:", compartment, "\n")
cat("Number of core taxa:", length(core_m), "\n")
cat("Some core taxa:", head(core_m), "\n")
}## Compartment: Stipe
## Number of core taxa: 378
## Some core taxa: ASV_779:g__WD2101_soil_group ASV_800:uncultured_Planctomycetaceae ASV_765:uncultured_Pasteuria ASV_1894:f__Pirellulaceae ASV_1044:g__Gemmata ASV_939:uncultured_Prosthecobacter
## Compartment: Rhizhomorph
## Number of core taxa: 990
## Some core taxa: ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_779:g__WD2101_soil_group ASV_1560:g__Pir4_lineage ASV_800:uncultured_Planctomycetaceae ASV_498:g__Pir4_lineage
## Compartment: Soil associated
## Number of core taxa: 1735
## Some core taxa: ASV_1034:uncultured_planctomycete ASV_2133:g__WD2101_soil_group ASV_1797:g__WD2101_soil_group ASV_1823:uncultured_planctomycete ASV_1945:g__WD2101_soil_group ASV_2459:g__WD2101_soil_group
#Plot venn
library(eulerr)
# Reorder
list_core_ordered_N <- list_core_taxaN[desired_order]
# Circular venn
plot_venn_venn_N <- plot(venn(list_core_ordered_N), fills = venn_colors, alpha = 0.9)
# Real size venn
euler_venn_N <- euler(list_core_ordered_N, shape = "ellipse")
plot_euler_venn_N <- plot(euler_venn_N, quantities = TRUE, fills = list(fill = c("#0073C2FF", "#EFC000FF", "#868686FF"), alpha = 0.9), legend = list(labels = c("Soil associated", "Rhizhomorph", "Stipe")))
plot_venn_venn_Nplot_euler_venn_NCombine plots
library(cowplot)
#Combine venn plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_venn_venn_fmt <- plot_grid(plot_venn_venn, labels = c("A"), ncol = 1)
venn_loc_plots <- plot_grid(plot_venn_venn_EC, plot_venn_venn_MO, plot_venn_venn_N, labels = c("B", "C", "D"), ncol = 3)
venn_all_plots <- plot_grid(title_vennplot, plot_venn_venn_fmt, venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))
venn_all_plotslibrary(cowplot)
#Combine euler plot
title_vennplot <- ggdraw() + draw_label("Core Microbiome", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
plot_euler_venn_fmt <- plot_grid(plot_euler_venn, labels = c("A"), ncol = 1)
euler_venn_loc_plots <- plot_grid(plot_euler_venn_EC, plot_euler_venn_MO, plot_euler_venn_N, labels = c("B", "C", "D"), ncol = 3)
euler_venn_all_plots <- plot_grid(title_vennplot, plot_euler_venn_fmt, euler_venn_loc_plots, ncol = 1, rel_heights = c(0.2, 2, 1.7))
euler_venn_all_plotsAll
#Factor Source
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Full all samples
ampv_heatmap_abundances_genus_full <- amp_heatmap(av2_full,
group_by = "Compartment",
facet_by = "Location",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))Locations
#Factor Source
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# El castillo
ampv_heatmap_abundances_genus_EC <- amp_heatmap(av2_ElCastillo,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
#Factor Source
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Monte Obscuro
ampv_heatmap_abundances_genus_MO <- amp_heatmap(av2_MonteObscuro,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
#Factor Source
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Naolinco
ampv_heatmap_abundances_genus_N <- amp_heatmap(av2_Naolinco,
group_by = "Compartment",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))Combine plots
library(cowplot)
#Combine venn plot
title_abund_plot <- ggdraw() + draw_label("Relative Abundance", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
ampv_abund_all_g_fmt <- plot_grid(ampv_heatmap_abundances_genus_full, labels = c("A"), ncol = 1)## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_abund_loc_g <- plot_grid(ampv_heatmap_abundances_genus_EC, ampv_heatmap_abundances_genus_MO, ampv_heatmap_abundances_genus_N, labels = c("B", "C", "D"), ncol = 3)## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
ampv_all_g_plots <- plot_grid(title_abund_plot, ampv_abund_all_g_fmt, ampv_abund_loc_g, ncol = 1, rel_heights = c(0.2, 1, 1))
ampv_all_g_plots
### Core
#Factor Source
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# Full all samples
ampv_heatmap_abundances_genus_core <- amp_heatmap(av2_core,
group_by = "Compartment",
facet_by = "Location",
plot_values = TRUE,
tax_show = 20,
tax_aggregate = "Genus",
#tax_add = "Family",
plot_colorscale = "log10",
plot_legendbreaks = c(1, 5, 5),
color_vector = c("white","deepskyblue3","magenta3"))
ampv_heatmap_abundances_genus_core## Warning in scale_fill_gradientn(colours = color.pal, trans = plot_colorscale, :
## log-10 transformation introduced infinite values.
All
library(ampvis2)
library(ggplot2)
#Factor Source
av2_full$metadata$Compartment <- factor(av2_full$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_full <- amp_rankabundance(av2_full, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
#ggsave("results/plots/02.diversity/02.rank_abundance.png", rank_ab , width = 5, height = 4)Location
# El Castillo
#Factor Source
av2_ElCastillo$metadata$Compartment <- factor(av2_ElCastillo$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_EC <- amp_rankabundance(av2_ElCastillo, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
# Monte Obscuro
#Factor Source
av2_MonteObscuro$metadata$Compartment <- factor(av2_MonteObscuro$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_MO <- amp_rankabundance(av2_MonteObscuro, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
# Naolinco
#Factor Source
av2_Naolinco$metadata$Compartment <- factor(av2_Naolinco$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_N <- amp_rankabundance(av2_Naolinco, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()Combine
library(cowplot)
#Combine euler plot
title_rank_plot <- ggdraw() + draw_label("Relative diversity and species evenness", fontface = 'bold', x = 0.5, hjust = 0.5, size = 15)
rank_ab_full_EC <- plot_grid(rank_ab_full, rank_ab_EC, labels = c("A", "B"), ncol = 2)## Warning: Removed 1887 rows containing missing values or values outside the scale range
## (`geom_line()`).
rank_ab_MO_N_plots <- plot_grid(rank_ab_MO, rank_ab_N, labels = c("C", "D"), ncol = 2)
rankabund_all_plots <- plot_grid(title_rank_plot, rank_ab_full_EC, rank_ab_MO_N_plots, ncol = 1, rel_heights = c(0.16, 1, 1))
rankabund_all_plotslibrary(ampvis2)
library(ggplot2)
#Factor Source
av2_core$metadata$Compartment <- factor(av2_core$metadata$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# rank abundance
rank_ab_full_core <- amp_rankabundance(av2_core, log10_x = TRUE, group_by = "Compartment") +
scale_color_jco()
rank_ab_full_corelibrary(hilldiv)## Registered S3 methods overwritten by 'FSA':
## method from
## confint.boot car
## hist.boot car
library(tidyverse)## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ lubridate 1.9.3 ✔ stringr 1.5.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ microbiome::alpha() masks ggplot2::alpha()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ lubridate::stamp() masks cowplot::stamp()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(kableExtra)##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
# Get hill numbers
q0 <- hill_div(otu_table_fltr, qvalue = 0)
q1 <- hill_div(otu_table_fltr, qvalue = 1)
q2 <- hill_div(otu_table_fltr, qvalue = 2)
# Merge metadata with Hill numbers
q012_all <- cbind(q0, q1, q2) %>% as.data.frame() %>% rownames_to_column(var = "sample-id")
metadata_with_hill_fltr <- q012_all %>%
inner_join(metadata_fltr, by = c("sample-id"="sample-id"))
#save table
write.table(metadata_with_hill_fltr, "results/03.Diversity/Metadata_with_hill.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# show
metadata_with_hill_fltr %>% head() %>% kable()| sample-id | q0 | q1 | q2 | sample | Source | Date | Year | Location | Week | Raw reads | Colect_number | Specimen | quant_reading | Effective_reads | ASVs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DR1014-E | 135 | 9.036873 | 3.171788 | DR1014-E | Stipe | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 159896 | DRamos 1014 | Ind_1 | 948 | 68295 | 135 |
| DR1014-SAH | 2118 | 1145.961220 | 576.966109 | DR1014-SAH | Soil associated | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 223804 | DRamos 1014 | Ind_1 | 3071 | 40974 | 2118 |
| DR1018-E | 768 | 47.874948 | 5.350837 | DR1018-E | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 68578 | DRamos 1018 | Ind_3 | 97 | 23890 | 768 |
| DR1018-SAH | 1824 | 887.566616 | 436.200764 | DR1018-SAH | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 211120 | DRamos 1018 | Ind_3 | 4017 | 38161 | 1824 |
| DR1019-E | 1012 | 447.167799 | 145.072574 | DR1019-E | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 144270 | DRamos 1019 | Ind_4 | 215 | 33853 | 1012 |
| DR1019-SAH | 2202 | 1317.872289 | 723.411536 | DR1019-SAH | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 185380 | DRamos 1019 | Ind_4 | 3754 | 34576 | 2202 |
Get means
# Reorder q values
meta_qs <- metadata_with_hill_fltr %>%
pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0", "q1", "q2")) %>%
mutate(
qs = case_when(
q == "q0" ~ "q=0",
q == "q1" ~ "q=1",
q == "q2" ~ "q=2"
))
#Get means of Hill numbers
means <- meta_qs %>% group_by(Source, Location, qs) %>%
summarise(means = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
.groups = 'drop')
#save table
write.table(means, "results/03.Diversity/Hill_means_sd.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
print(means)## # A tibble: 27 × 5
## Source Location qs means sd
## <fct> <fct> <chr> <dbl> <dbl>
## 1 Soil associated El Castillo/Zentla q=0 1378. 201.
## 2 Soil associated El Castillo/Zentla q=1 531. 88.4
## 3 Soil associated El Castillo/Zentla q=2 183. 47.6
## 4 Soil associated Monte obscuro Emiliano Zapata q=0 2100. 154.
## 5 Soil associated Monte obscuro Emiliano Zapata q=1 1139. 142.
## 6 Soil associated Monte obscuro Emiliano Zapata q=2 591. 114.
## 7 Soil associated Naolinco q=0 1733. 131.
## 8 Soil associated Naolinco q=1 524. 165.
## 9 Soil associated Naolinco q=2 146. 101.
## 10 Rhizhomorph El Castillo/Zentla q=0 1103. 256.
## # ℹ 17 more rows
library(ggpubr) ##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
##
## get_legend
## The following object is masked from 'package:qiime2R':
##
## mean_sd
# factor to reorder plot
metadata_with_hill_fltr$Location <- factor(metadata_with_hill_fltr$Location)
metadata_with_hill_fltr$Compartment <- factor(metadata_with_hill_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
#plot
hill_barplot_explore_fltr <- metadata_with_hill_fltr %>%
pivot_longer(cols = q0:q2, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0", "q1", "q2")) %>%
mutate(
qs = case_when(
q == "q0" ~ "q=0",
q == "q1" ~ "q=1",
q == "q2" ~ "q=2"
)) %>%
ggbarplot(
x = "Compartment",
y = "value",
add = "mean_se",
facet.by = c("qs", "Location"),
fill = "Compartment"
) +
scale_fill_jco() +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
theme(
strip.background = element_blank(), #element_rect(fill = "")
strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
#panel.border = element_blank(),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "", #,
title = "Biodiversity across Locations and Compartments"
) + #+
#theme(plot.title = element_text(hjust = 0.5))
theme(legend.text = element_text(size = 12)) #+
#stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")
hill_barplot_explore_fltrggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)Alpha diversity depth correlation to order q=0
#install.packages("ggpubr")
library(ggpubr)
q0_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q0",
xlab= "Sequencing depth (number of reads)",
ylab = "q=0",
#ylab="Alpha diversity q=0 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")Alpha diversity depth correlation to order q=1
q1_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q1",
xlab= "Sequencing depth (number of reads)",
ylab = "q=1",
#ylab="Alpha diversity q=1 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")Alpha diversity depth correlation to order q=2
q2_vs_depth_fltr <- ggscatter(metadata_with_hill_fltr,
x = "Effective_reads",
y = "q2",
xlab= "Sequencing depth (number of reads)",
ylab = "q=2",
#ylab="Alpha diversity q=2 (effective number of total ASVs)",
add = "reg.line", # Add regression line
add.params = list(color = "#114e9d", fill = "lightgray"), # Customize reg. line
conf.int = TRUE, # Add confidence interval
cor.coef = TRUE, # Add correlation coefficient. see ?stat_cor
cor.coeff.args = list(method = "pearson", label.x = 3, label.sep = "\n")
)+
theme(legend.title = element_blank(), legend.position = "none")library(ggplot2)
library(cowplot)
#save plots
#Combine plot
title_corr_plot <- ggdraw() + draw_label("Alpha diversity depth correlation with fltr samples")
correlation_plot_q012_fltr <- plot_grid(title_corr_plot, q0_vs_depth_fltr,q1_vs_depth_fltr,q2_vs_depth_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
correlation_plot_q012_fltr#save plot
ggsave("results/plots/02.diversity/03.alpha_depth_correlation_fltr_samples.png", correlation_plot_q012_fltr, width = 8, height = 8)Shapiro test to Hill numbers
library(ggplot2)
library(cowplot)
#Shapiro test
shapiro_q0_fltr <- shapiro.test(metadata_with_hill_fltr$q0)
shapiro_q0_fltr_pvalue <- round(shapiro_q0_fltr$p.value, 5)
shapiro_q1_fltr <- shapiro.test(metadata_with_hill_fltr$q1)
shapiro_q1_fltr_pvalue <- round(shapiro_q1_fltr$p.value, 5)
shapiro_q2_fltr <- shapiro.test(metadata_with_hill_fltr$q2)
shapiro_q2_fltr_pvalue <- round(shapiro_q2_fltr$p.value, 9)
#Histograms
histplot_q0_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q0, xlab="q=0")) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q0_fltr_pvalue)) +
theme_bw() + xlab("q=0") + ylab("Frequency")
histplot_q1_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q1)) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q1_fltr_pvalue)) +
theme_bw() + xlab("q=1") + ylab("Frequency")
histplot_q2_fltr <- ggplot(metadata_with_hill_fltr, aes(x = q2)) +
geom_histogram(fill = "lightblue", bins = 15) +
ggtitle(paste("Shapiro, p-value:", shapiro_q2_fltr_pvalue)) +
theme_bw() + xlab("q=2") + ylab("Frequency")
#Combine plot
title_plot <- ggdraw() + draw_label("Histogram of Hill diversity") #, fontface = 'bold', x = 0.5, hjust = 0.5)
histplot_q012_fltr <- plot_grid(title_plot, histplot_q0_fltr, histplot_q1_fltr, histplot_q2_fltr, labels = c(" ","A", "B", "C"), ncol = 1, rel_heights = c(0.15, 1, 1, 1))
histplot_q012_fltr#save plot
ggsave("results/plots/02.diversity/04.hill_normality_test_histogram_fltr_samples.png", histplot_q012_fltr)## Saving 7 x 5 in image
A pesar de que q0 tiene una distribución normal, dado que los datos del proyecto tienen medidas repetidas y desbalanceo, lo adecuado es hacer comparaciones basadas en modelos. En este caso modelos lineales mixtos
Estos modelos son útiles cuando las observaciones tienen estructuras de dependencia, como datos longitudinales o datos agrupados/anidados. Los modelos lineales mixtos incluyen tanto efectos fijos como efectos aleatorios, donde los efectos fijos son comunes a todas las unidades/individuos, y los efectos aleatorios varían por grupo o unidad.
lme
library(nlme)
# lme
# q0
q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q0_lme_perm_fltr <- PermTest(q0_lme_fltr, B = 100)
# q1
q1_lme_fltr <- lme(q1 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q1_lme_perm_fltr <- PermTest(q1_lme_fltr, B = 100)
# q2
q2_lme_fltr <- lme(q2 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
q2_lme_perm_fltr <- PermTest(q2_lme_fltr, B = 100)glm
#q0 glm
q0_glm_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glm
q1_glm_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.036873
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1145.961220
## Warning in dpois(y, mu, log = TRUE): non-integer x = 47.874948
## Warning in dpois(y, mu, log = TRUE): non-integer x = 887.566616
## Warning in dpois(y, mu, log = TRUE): non-integer x = 447.167799
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1317.872289
## Warning in dpois(y, mu, log = TRUE): non-integer x = 375.937389
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1212.407272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 114.546532
## Warning in dpois(y, mu, log = TRUE): non-integer x = 689.583452
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1236.713620
## Warning in dpois(y, mu, log = TRUE): non-integer x = 27.775571
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1141.539572
## Warning in dpois(y, mu, log = TRUE): non-integer x = 198.051965
## Warning in dpois(y, mu, log = TRUE): non-integer x = 649.434099
## Warning in dpois(y, mu, log = TRUE): non-integer x = 1033.757810
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.120060
## Warning in dpois(y, mu, log = TRUE): non-integer x = 236.465478
## Warning in dpois(y, mu, log = TRUE): non-integer x = 470.600863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 40.056484
## Warning in dpois(y, mu, log = TRUE): non-integer x = 191.591875
## Warning in dpois(y, mu, log = TRUE): non-integer x = 515.230864
## Warning in dpois(y, mu, log = TRUE): non-integer x = 118.919348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 286.196272
## Warning in dpois(y, mu, log = TRUE): non-integer x = 521.533059
## Warning in dpois(y, mu, log = TRUE): non-integer x = 21.243804
## Warning in dpois(y, mu, log = TRUE): non-integer x = 483.242102
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.992440
## Warning in dpois(y, mu, log = TRUE): non-integer x = 76.214330
## Warning in dpois(y, mu, log = TRUE): non-integer x = 502.299672
## Warning in dpois(y, mu, log = TRUE): non-integer x = 22.362994
## Warning in dpois(y, mu, log = TRUE): non-integer x = 406.547029
## Warning in dpois(y, mu, log = TRUE): non-integer x = 610.775331
## Warning in dpois(y, mu, log = TRUE): non-integer x = 456.279074
## Warning in dpois(y, mu, log = TRUE): non-integer x = 631.344825
## Warning in dpois(y, mu, log = TRUE): non-integer x = 625.359839
## Warning in dpois(y, mu, log = TRUE): non-integer x = 29.302276
## Warning in dpois(y, mu, log = TRUE): non-integer x = 99.082917
## Warning in dpois(y, mu, log = TRUE): non-integer x = 384.027703
## Warning in dpois(y, mu, log = TRUE): non-integer x = 160.920694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 600.653719
## Warning in dpois(y, mu, log = TRUE): non-integer x = 667.285427
## Warning in dpois(y, mu, log = TRUE): non-integer x = 279.227229
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.358681
## Warning in dpois(y, mu, log = TRUE): non-integer x = 55.066246
## Warning in dpois(y, mu, log = TRUE): non-integer x = 141.925035
## Warning in dpois(y, mu, log = TRUE): non-integer x = 15.144746
## Warning in dpois(y, mu, log = TRUE): non-integer x = 110.564833
## Warning in dpois(y, mu, log = TRUE): non-integer x = 7.028120
## Warning in dpois(y, mu, log = TRUE): non-integer x = 12.557778
## Warning in dpois(y, mu, log = TRUE): non-integer x = 211.697465
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.425755
## Warning in dpois(y, mu, log = TRUE): non-integer x = 351.503682
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.579458
## Warning in dpois(y, mu, log = TRUE): non-integer x = 250.463366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 444.833675
## Warning in dpois(y, mu, log = TRUE): non-integer x = 124.745022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 94.655639
## Warning in dpois(y, mu, log = TRUE): non-integer x = 305.856635
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.462631
## Warning in dpois(y, mu, log = TRUE): non-integer x = 177.480033
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.411210
## Warning in dpois(y, mu, log = TRUE): non-integer x = 516.220144
## Warning in dpois(y, mu, log = TRUE): non-integer x = 288.582443
## Warning in dpois(y, mu, log = TRUE): non-integer x = 398.115993
## Warning in dpois(y, mu, log = TRUE): non-integer x = 668.236422
## Warning in dpois(y, mu, log = TRUE): non-integer x = 486.264725
## Warning in dpois(y, mu, log = TRUE): non-integer x = 321.527670
## Warning in dpois(y, mu, log = TRUE): non-integer x = 531.867537
## Warning in dpois(y, mu, log = TRUE): non-integer x = 747.886370
## Warning in dpois(y, mu, log = TRUE): non-integer x = 769.539696
## Warning in dpois(y, mu, log = TRUE): non-integer x = 510.480594
#q2 glm
q2_glm_fltr <- glm(q2 ~ log(Effective_reads)+Source*Location,
family = poisson(link = "log"),
data = metadata_with_hill_fltr)## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.171788
## Warning in dpois(y, mu, log = TRUE): non-integer x = 576.966109
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.350837
## Warning in dpois(y, mu, log = TRUE): non-integer x = 436.200764
## Warning in dpois(y, mu, log = TRUE): non-integer x = 145.072574
## Warning in dpois(y, mu, log = TRUE): non-integer x = 723.411536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 153.597535
## Warning in dpois(y, mu, log = TRUE): non-integer x = 666.541403
## Warning in dpois(y, mu, log = TRUE): non-integer x = 34.456157
## Warning in dpois(y, mu, log = TRUE): non-integer x = 178.728096
## Warning in dpois(y, mu, log = TRUE): non-integer x = 676.765401
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.375549
## Warning in dpois(y, mu, log = TRUE): non-integer x = 617.084909
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.482863
## Warning in dpois(y, mu, log = TRUE): non-integer x = 234.062638
## Warning in dpois(y, mu, log = TRUE): non-integer x = 441.625630
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.057934
## Warning in dpois(y, mu, log = TRUE): non-integer x = 62.632737
## Warning in dpois(y, mu, log = TRUE): non-integer x = 216.098156
## Warning in dpois(y, mu, log = TRUE): non-integer x = 14.800651
## Warning in dpois(y, mu, log = TRUE): non-integer x = 43.314536
## Warning in dpois(y, mu, log = TRUE): non-integer x = 139.622390
## Warning in dpois(y, mu, log = TRUE): non-integer x = 36.762232
## Warning in dpois(y, mu, log = TRUE): non-integer x = 79.269153
## Warning in dpois(y, mu, log = TRUE): non-integer x = 162.051399
## Warning in dpois(y, mu, log = TRUE): non-integer x = 9.485131
## Warning in dpois(y, mu, log = TRUE): non-integer x = 170.638980
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.551297
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.942091
## Warning in dpois(y, mu, log = TRUE): non-integer x = 127.678843
## Warning in dpois(y, mu, log = TRUE): non-integer x = 6.524882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 122.819172
## Warning in dpois(y, mu, log = TRUE): non-integer x = 209.071791
## Warning in dpois(y, mu, log = TRUE): non-integer x = 103.936882
## Warning in dpois(y, mu, log = TRUE): non-integer x = 278.567311
## Warning in dpois(y, mu, log = TRUE): non-integer x = 168.692594
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.734694
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.701974
## Warning in dpois(y, mu, log = TRUE): non-integer x = 167.217679
## Warning in dpois(y, mu, log = TRUE): non-integer x = 19.685820
## Warning in dpois(y, mu, log = TRUE): non-integer x = 210.245304
## Warning in dpois(y, mu, log = TRUE): non-integer x = 284.919159
## Warning in dpois(y, mu, log = TRUE): non-integer x = 46.476316
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.680969
## Warning in dpois(y, mu, log = TRUE): non-integer x = 11.637827
## Warning in dpois(y, mu, log = TRUE): non-integer x = 30.473794
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.983116
## Warning in dpois(y, mu, log = TRUE): non-integer x = 33.873471
## Warning in dpois(y, mu, log = TRUE): non-integer x = 3.263018
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.779628
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.277613
## Warning in dpois(y, mu, log = TRUE): non-integer x = 4.602524
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.393270
## Warning in dpois(y, mu, log = TRUE): non-integer x = 5.950724
## Warning in dpois(y, mu, log = TRUE): non-integer x = 52.948584
## Warning in dpois(y, mu, log = TRUE): non-integer x = 151.370352
## Warning in dpois(y, mu, log = TRUE): non-integer x = 17.340366
## Warning in dpois(y, mu, log = TRUE): non-integer x = 20.849351
## Warning in dpois(y, mu, log = TRUE): non-integer x = 48.667022
## Warning in dpois(y, mu, log = TRUE): non-integer x = 23.718140
## Warning in dpois(y, mu, log = TRUE): non-integer x = 18.618940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 16.535976
## Warning in dpois(y, mu, log = TRUE): non-integer x = 95.808905
## Warning in dpois(y, mu, log = TRUE): non-integer x = 54.011247
## Warning in dpois(y, mu, log = TRUE): non-integer x = 83.060795
## Warning in dpois(y, mu, log = TRUE): non-integer x = 238.121940
## Warning in dpois(y, mu, log = TRUE): non-integer x = 164.033797
## Warning in dpois(y, mu, log = TRUE): non-integer x = 53.685546
## Warning in dpois(y, mu, log = TRUE): non-integer x = 81.619451
## Warning in dpois(y, mu, log = TRUE): non-integer x = 255.500307
## Warning in dpois(y, mu, log = TRUE): non-integer x = 347.159348
## Warning in dpois(y, mu, log = TRUE): non-integer x = 90.468592
glm quasipoisson
#q0 glm quasipoisson
q0_glm_quasipoisson_fltr <- glm(q0 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glm quasipoisson
q1_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)
#q2 glm quasipoisson
q2_glm_quasipoisson_fltr <- glm(q1 ~ log(Effective_reads)+Source*Location,
family = quasipoisson(link = "log"),
data = metadata_with_hill_fltr)glmer
#q0 glmer
q0_glmer_fltr <- glmer(q0 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q1 glmer
q1_glmer_fltr <- glmer(q1 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)
#q2 glmer
q2_glmer_fltr <- glmer(q2 ~ log(Effective_reads)+Source*Location+(1|Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr)glmmTMB
# glmmTMB
# q0
q0_glmmTMB_fltr <- glmmTMB(q0 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)
# q1
q1_glmmTMB_fltr <- glmmTMB(q1 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model
# q2
q2_glmmTMB_fltr <- glmmTMB(q2 ~ log(Effective_reads)+Source*Location + (1 | Specimen),
family = poisson(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a poisson model
glmmTMB negative binomial
# glmmTMB
# q0
q0_glmmTMB_negbinml_fltr <- glmmTMB(q0 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)
# q1
q1_glmmTMB_negbinml_fltr <- glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q1 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model
# q2
q2_glmmTMB_negbinml_fltr <- glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | Specimen),
family = nbinom2(link = "log"),
data = metadata_with_hill_fltr,
dispformula = ~ 1)## Warning in glmmTMB(q2 ~ log(Effective_reads) + Source * Location + (1 | :
## non-integer counts in a nbinom2 model
calculate_AIC_quasiPoisson <- function(model) {
residuals <- residuals(model, type = "pearson")
deviance <- sum(residuals^2)
n <- length(residuals)
logLikelihood <- -0.5 * (n * log(2 * pi * deviance / n) + deviance)
k <- length(coef(model))
AIC_value <- 2 * k - 2 * logLikelihood
return(AIC_value)
}q012_gmodels_comparison_fltr <- data.frame(
Model = c("LME q0", "GLM Poisson q0", "GLM Quasi-Poisson q0", "GLMER q0", "GLMMTMB q0", "GLMMTMB_bn q0",
"LME q1", "GLM Poisson q1", "GLM Quasi-Poisson q1", "GLMER q1", "GLMMTMB q1","GLMMTMB_bn q1",
"LME q2", "GLM Poisson q2", "GLM Quasi-Poisson q2", "GLMER q2", "GLMMTMB q2","GLMMTMB_bn q2"),
AIC = c(AIC(q0_lme_fltr), AIC(q0_glm_fltr), calculate_AIC_quasiPoisson(q0_glm_quasipoisson_fltr), AIC(q0_glmer_fltr), AIC(q0_glmmTMB_fltr), AIC(q0_glmmTMB_negbinml_fltr),
AIC(q1_lme_fltr), AIC(q1_glm_fltr), calculate_AIC_quasiPoisson(q1_glm_quasipoisson_fltr), AIC(q1_glmer_fltr), AIC(q1_glmmTMB_fltr), AIC(q1_glmmTMB_negbinml_fltr),
AIC(q2_lme_fltr), AIC(q2_glm_fltr), calculate_AIC_quasiPoisson(q2_glm_quasipoisson_fltr), AIC(q2_glmer_fltr), AIC(q2_glmmTMB_fltr), AIC(q2_glmmTMB_negbinml_fltr)),
BIC = c(BIC(q0_lme_fltr), BIC(q0_glm_fltr), BIC(q0_glm_quasipoisson_fltr), BIC(q0_glmer_fltr), BIC(q0_glmmTMB_fltr), BIC(q0_glmmTMB_negbinml_fltr),
BIC(q1_lme_fltr), BIC(q1_glm_fltr), BIC(q1_glm_quasipoisson_fltr), BIC(q1_glmer_fltr), BIC(q1_glmmTMB_fltr), BIC(q1_glmmTMB_negbinml_fltr),
BIC(q2_lme_fltr), BIC(q2_glm_fltr), BIC(q2_glm_quasipoisson_fltr), BIC(q2_glmer_fltr), BIC(q2_glmmTMB_fltr), BIC(q2_glmmTMB_negbinml_fltr))
)
# show
q012_gmodels_comparison_fltrThe best AIC/BIC values are with lme, so, I will check lme model assumptions
Effect
# extract p-values fun
extract_pvalues <- function(perm_test_result) {
perm_test_result$resultats$p.value
}
# extract
lme_pvalues_q0_fltr <- extract_pvalues(q0_lme_perm_fltr)
lme_pvalues_q1_fltr <- extract_pvalues(q1_lme_perm_fltr)
lme_pvalues_q2_fltr <- extract_pvalues(q2_lme_perm_fltr)
# Dataframe p-values
library(dplyr)
lme_pvalues_table_fltr <- data.frame(
Hill = c("q0", "q1", "q2"),
`Intercept` = c(lme_pvalues_q0_fltr[1], lme_pvalues_q1_fltr[1], lme_pvalues_q2_fltr[1]),
`log(Depth)` = c(lme_pvalues_q0_fltr[2], lme_pvalues_q1_fltr[2], lme_pvalues_q2_fltr[2]),
`Source` = c(lme_pvalues_q0_fltr[3], lme_pvalues_q1_fltr[3], lme_pvalues_q2_fltr[3]),
`Location` = c(lme_pvalues_q0_fltr[4], lme_pvalues_q1_fltr[4], lme_pvalues_q2_fltr[4]),
`Source:Location` = c(lme_pvalues_q0_fltr[5], lme_pvalues_q1_fltr[5], lme_pvalues_q2_fltr[5])
)
# table
knitr::kable(t(lme_pvalues_table_fltr), caption = "P-values from Permuted LME")| Hill | q0 | q1 | q2 |
| Intercept | 0.02 | 0.00 | 0.01 |
| log.Depth. | 0.09 | 0.00 | 0.03 |
| Source | 0 | 0 | 0 |
| Location | 0 | 0 | 0 |
| Source.Location | 0.03 | 0.00 | 0.00 |
# save
write.csv(t(lme_pvalues_table_fltr), "results/03.Diversity/P-values_from_Permuted_LME_fltr.csv", row.names = FALSE)Ok, aquí parece que a excepción de log(Effective reads) Source, Location y la interacción tienen un efecto significativo en la diversidad q0, q1 y q2.
Evaluemos los supuestos:
Homocedasticidad
bptest, solo funciona para lm no para lme :(
#Homocedasticidad p-value >0.05 si cumple supuesto de homocedasticidad
library(lmtest)
# q0
Homoce_lme_q0_fltr <- bptest(q0_lme_fltr)
Homoce_lme_q0_fltr
# q1
Homoce_lme_q1_fltr <- bptest(q1_lme_fltr)
Homoce_lme_q1_fltr
# q2
Homoce_lme_q2_fltr <- bptest(q2_lme_fltr)
Homoce_lme_q2_fltrq0_lme_perm_fltr##
## Monte-Carlo test
##
## Call:
## PermTest.lme(obj = q0_lme_fltr, B = 100)
##
## Based on 100 replicates
## Simulated p-value:
## p.value
## (Intercept) 0.02
## log(Effective_reads) 0.09
## Source 0.00
## Location 0.00
## Source:Location 0.03
Check with easystats
q=0
#easystats::install_suggested()
library(easystats)## # Attaching packages: easystats 0.7.1 (red = needs update)
## ✔ bayestestR 0.13.2 ✔ correlation 0.8.4
## ✔ datawizard 0.10.0 ✔ effectsize 0.8.7
## ✔ insight 0.19.10 ✔ modelbased 0.8.7
## ✔ performance 0.11.0 ✔ parameters 0.21.6
## ✔ report 0.5.8 ✖ see 0.8.3
##
## Restart the R-Session and update packages with `easystats::easystats_update()`.
#Remember model # q0_lme_fltr <- lme(q0 ~ log(Effective_reads)+Source*Location, random=~1 |Specimen, data = metadata_with_hill_fltr)
check_q0_lme_1 <- check_model(q0_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q0_lme_1)check_q0_lme_2 <- check_model(update(q0_lme_fltr, .~. -Source:Location))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q0_lme_2)
Al eliminar la interacción mejoró la colinealidad y la normalidad y
linearidad se mantuvieron adecuadas. Me quedo entonces con este modelo
ajustado
q=1
check_q1_lme_1 <- check_model(q1_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
check_q1_lme_1check_q1_lme_2 <- check_model(update(q1_lme_fltr, .~. -Source:Location))## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
print(check_q1_lme_2)Pues realmente, no mejoró mucho la linealidad, asi que seguiré con el modelo que después de lme dió mejores valores de AIC/BIC.
q=2
check_q2_lme_1 <- check_model(q2_lme_fltr)## Converting missing values (`NA`) into regular values currently not
## possible for variables of class `NULL`.
check_q2_lme_1
Pff este si estuvo muy cónico, next!
Checar que onda con esto:
#q0
alpha<- metadata_with_hill %>% unite("interact", c("Source", "Location"), remove = F)
q0_lme<-lme(q0~ interact, random=~1 |Specimen, data = alpha)
q0_lme_means<-emmeans(q0_lme, pairwise ~ interact)
multcomp::cld(object = q0_lme_means$emmeans,
Letters = letters)
library(hilldiv)
library(tidyverse)
library(kableExtra)
# Get hill numbers
q0_c <- hill_div(otu_table_full_core, qvalue = 0)
q1_c <- hill_div(otu_table_full_core, qvalue = 1)
q2_c <- hill_div(otu_table_full_core, qvalue = 2)
# Merge metadata with Hill numbers
q012_c <- cbind(q0_c, q1_c, q2_c) %>% as.data.frame() %>% rownames_to_column(var = "sample")
metadata_with_hill_core <- q012_c %>%
inner_join(meta_full_core, by = c("sample"="sample"))
#save table
write.table(metadata_with_hill_core, "results/03.Diversity/Metadata_with_hill_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
# show
metadata_with_hill_core %>% head() %>% kable()| sample | q0_c | q1_c | q2_c | Source | Date | Year | Location | Week | Raw.reads | Colect_number | Specimen | quant_reading | Effective_reads | ASVs |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| DR1014-E | 33 | 5.937934 | 3.364797 | Stipe | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 159896 | DRamos 1014 | Ind_1 | 948 | 68295 | 135 |
| DR1014-SAH | 144 | 89.472107 | 62.663860 | Soil associated | sep-30 | 2021 | Monte obscuro Emiliano Zapata | 1 | 223804 | DRamos 1014 | Ind_1 | 3071 | 40974 | 2118 |
| DR1018-E | 122 | 6.037923 | 2.004335 | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 68578 | DRamos 1018 | Ind_3 | 97 | 23890 | 768 |
| DR1018-SAH | 153 | 91.533182 | 64.969780 | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 211120 | DRamos 1018 | Ind_3 | 4017 | 38161 | 1824 |
| DR1019-E | 122 | 61.454388 | 33.429425 | Stipe | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 144270 | DRamos 1019 | Ind_4 | 215 | 33853 | 1012 |
| DR1019-SAH | 153 | 96.430497 | 66.569922 | Soil associated | oct-07 | 2021 | Monte obscuro Emiliano Zapata | 2 | 185380 | DRamos 1019 | Ind_4 | 3754 | 34576 | 2202 |
Get means
# Reorder q values
meta_qs_c <- metadata_with_hill_core %>%
pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
mutate(
qs = case_when(
q == "q0_c" ~ "q=0",
q == "q1_c" ~ "q=1",
q == "q2_c" ~ "q=2"
))
#Get means of Hill numbers
means_c <- meta_qs_c %>% group_by(Source, Location, qs) %>%
summarise(means = mean(value, na.rm = TRUE),
sd = sd(value, na.rm = TRUE),
.groups = 'drop')
#save table
write.table(means_c, "results/03.Diversity/Hill_means_sd_core.tsv", quote = FALSE, sep = "\t", row.names = TRUE, col.names=TRUE)
print(means_c)## # A tibble: 27 × 5
## Source Location qs means sd
## <fct> <chr> <chr> <dbl> <dbl>
## 1 Soil associated El Castillo/Zentla q=0 234. 22.1
## 2 Soil associated El Castillo/Zentla q=1 96.2 8.67
## 3 Soil associated El Castillo/Zentla q=2 44.4 9.64
## 4 Soil associated Monte obscuro Emiliano Zapata q=0 156. 9.41
## 5 Soil associated Monte obscuro Emiliano Zapata q=1 96.2 6.49
## 6 Soil associated Monte obscuro Emiliano Zapata q=2 66.7 8.14
## 7 Soil associated Naolinco q=0 327. 19.2
## 8 Soil associated Naolinco q=1 101. 22.0
## 9 Soil associated Naolinco q=2 38.0 16.4
## 10 Rhizhomorph El Castillo/Zentla q=0 223. 21.0
## # ℹ 17 more rows
library(ggpubr)
# factor to reorder plot
metadata_with_hill_core$Location <- factor(metadata_with_hill_core$Location)
metadata_with_hill_core$Compartment <- factor(metadata_with_hill_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
#plot
hill_barplot_explore_core <- metadata_with_hill_core %>%
pivot_longer(cols = q0_c:q2_c, names_to = "q", values_to = "value") %>%
filter(q %in% c("q0_c", "q1_c", "q2_c")) %>%
mutate(
qs = case_when(
q == "q0_c" ~ "q=0",
q == "q1_c" ~ "q=1",
q == "q2_c" ~ "q=2"
)) %>%
ggbarplot(
x = "Compartment",
y = "value",
add = "mean_se",
facet.by = c("qs", "Location"),
fill = "Compartment"
) +
scale_fill_jco() +
geom_jitter(size = 0.8, position = position_jitter(width = 0.1)) +
facet_grid(rows = vars(qs), cols = vars(Location), scales = "free_y") +
theme(
strip.background = element_blank(), #element_rect(fill = "")
strip.text.x = element_text(face= "bold", size = 13, margin = margin(0.5, 0, 0.5, 0)),
strip.text.y = element_text(face= "bold", size = 14, angle = 0, margin = margin(0, 0.5, 0, 0.5)),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.line = element_line(colour = "black"),
#panel.border = element_blank(),
panel.spacing.x = unit(0.5, "lines"),
panel.background = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_text(colour = "black", size = 11),
legend.position = "bottom"
) +
labs(
fill = "Compartment",
y = "",
x = "", #,
title = "Biodiversity across Locations and Compartments of Core Microbiota"
) + #+
#theme(plot.title = element_text(hjust = 0.5))
theme(legend.text = element_text(size = 12)) #+
#stat_summary(fun = mean, geom = "line", aes(group = Compartment), color = "red")
hill_barplot_explore_core#ggsave("results/plots/02.diversity/02.Hill_diversity_across_Locations_and_Compartments_fltr.png", hill_barplot_explore_fltr, width = 11, height = 11)Prepare data
Explore
aldex_clr_data <- t(getMonteCarloSample(aldex_clr,1))
pca <- prcomp(aldex_clr_data)
meta=metadata_with_hill_fltr %>% dplyr::rename(SampleID="sample-id")library(ggplot2)
library(tidyverse)
library(ggsci)
pca_plot<- ggplot() +
geom_segment(data=data.frame(pca$rotation) %>% #arrows
rownames_to_column(var = "Feature.ID")%>%
mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
top_n(8, a) %>% #keep 10 furthest away points
mutate(PC1=PC1*50, PC2=PC2*50),
aes(x=0, xend=PC1, y=0, yend=PC2),
arrow = arrow(length = unit(0.3,"cm")))+
geom_point(data=data.frame(pca$x) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=PC1, y=PC2, fill=Source),shape=21, size=2) +
geom_vline(xintercept = 0, linetype = 2) + #lines-cross
geom_hline(yintercept = 0, linetype = 2) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical") +
geom_polygon(data=data.frame(pca$x) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID")%>%
drop_na() %>%
group_by(Location) %>%
slice(chull(PC1, PC2)),
aes(x=PC1, y=PC2, fill=Source, color=Source),
alpha = 0.3,
show.legend = FALSE)+
ylab("PC2")+xlab("PC1")+ theme_bw()
# ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>% #arrows
# rownames_to_column(var = "Feature.ID")%>%
#mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
#top_n(8, a) %>% #keep 10 furthest away points
#mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
# taxonomys)%>% dplyr::select(
#Taxon, PC1, PC2, Feature.ID)%>%
#mutate_at(
# c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
# tax= str_extract(Taxon, "[^__]+$")) %>%
#mutate_at(c("tax"), funs(tax = case_when(
# tax=="Fungi" ~ "Unidentified",
#tax=="sajor_caju" ~ "Lentinus",
#TRUE~as.character(tax)))),
#aes(x=PC1, y=PC2, label= tax),
#segment.colour = NA, col = 'black', fill= "#EEEEEE",
#fontface="italic", box.padding = 0.2, size=4)
pca_plotlibrary(ggplot2)
library(dplyr)
library(tidyr)
# Reorder
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
# plot
pca_plot <- ggplot(data = data.frame(pca$x) %>%
rownames_to_column(var = "SampleID") %>%
left_join(meta, by = "SampleID")) +
geom_segment(data = data.frame(pca$rotation) %>%
rownames_to_column(var = "Feature.ID") %>%
mutate(a = sqrt(PC1^2 + PC2^2)) %>%
top_n(8, a) %>%
mutate(PC1 = PC1*50, PC2 = PC2*50),
aes(x = 0, xend = PC1, y = 0, yend = PC2),
arrow = arrow(length = unit(0.01,"cm"))) +
geom_point(aes(x = PC1, y = PC2, color = Source, shape = Location), size = 3) +
geom_vline(xintercept = 0, linetype = 2) +
geom_hline(yintercept = 0, linetype = 2) +
scale_color_jco() +
scale_shape_manual(values = c(15, 16, 17)) + # Ajusta los valores al número de categorías de 'Location'
scale_fill_jco() +
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical") +
labs(y = "PC2", x = "PC1", color = "Source", shape = "Location") +
theme_bw()
# ggrepel::geom_label_repel(data=data.frame(pca$rotation) %>% #arrows
# rownames_to_column(var = "Feature.ID")%>%
# mutate(a=sqrt(PC1^2+PC2^2)) %>% # calculate the distance from the origin
# top_n(8, a) %>% #keep 10 furthest away points
# mutate(PC1=PC1*10, PC2=PC2*10)%>% left_join(
# taxonomys)%>% dplyr::select(
# Taxon, PC1, PC2, Feature.ID)%>%
# mutate_at(
# c("Taxon"), ~str_replace(.,";s__unidentified", "")) %>% mutate(
# tax= str_extract(Taxon, "[^__]+$")) %>%
# mutate_at(c("tax"), funs(tax = case_when(
# tax=="Fungi" ~ "Unidentified",
# tax=="sajor_caju" ~ "Lentinus",
# TRUE~as.character(tax)))),
# aes(x=PC1, y=PC2, label= tax),
# segment.colour = NA, col = 'black', fill= "#EEEEEE",
# fontface="italic", box.padding = 0.2, size=4)
pca_plotnmds_source_clr = vegan::metaMDS(aldex_clr_data, trymax = 20, k = 2)
var_stress_nmds_clr <- round(nmds_source_clr$stress, 5)
var_stress_nmds_clr
scores_source_clr= nmds_source_clr %>% vegan::scores()
meta$Source <- factor(meta$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_clr<- ggplot() +
geom_point(data=data.frame(scores_source_clr) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_clr <- nmds_plot_clr +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_clr
Me marca un error:
Error in eigen(-x/2, symmetric = TRUE) :
infinite or missing values in 'x'
# Bray NMDS bray
library(vegan)## Loading required package: permute
##
## Attaching package: 'permute'
## The following object is masked from 'package:devtools':
##
## check
## Loading required package: lattice
## This is vegan 2.6-4
##
## Attaching package: 'vegan'
## The following object is masked from 'package:microbiome':
##
## diversity
bray=vegdist(t(otu_table_fltr), method = "bray")
nmds_source_bray = vegan::metaMDS(bray, trymax = 20, k = 2) ## Run 0 stress 0.173438
## Run 1 stress 0.2049176
## Run 2 stress 0.2066556
## Run 3 stress 0.1748666
## Run 4 stress 0.1901403
## Run 5 stress 0.1769892
## Run 6 stress 0.1968377
## Run 7 stress 0.1741147
## Run 8 stress 0.1747265
## Run 9 stress 0.1756538
## Run 10 stress 0.1743105
## Run 11 stress 0.1993007
## Run 12 stress 0.1849932
## Run 13 stress 0.20207
## Run 14 stress 0.1734733
## ... Procrustes: rmse 0.008022249 max resid 0.06554523
## Run 15 stress 0.1744183
## Run 16 stress 0.1899323
## Run 17 stress 0.2033364
## Run 18 stress 0.1748666
## Run 19 stress 0.1769895
## Run 20 stress 0.1878257
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
var_stress_nmds_bray <- round(nmds_source_bray$stress, 5)
var_stress_nmds_bray## [1] 0.17344
scores_source_bray= nmds_source_bray %>% vegan::scores()
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_bray<- ggplot() +
geom_point(data=data.frame(scores_source_bray) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(metadata_fltr, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_bray <- nmds_plot_bray +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_bray),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_bray# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
aitch=vegdist(t(otu_table_fltr_pseudo), method = "aitchison")
nmds_source_aitch = vegan::metaMDS(aitch, trymax = 20, k = 2) ## Run 0 stress 0.1675395
## Run 1 stress 0.1760583
## Run 2 stress 0.1760584
## Run 3 stress 0.2104237
## Run 4 stress 0.1826827
## Run 5 stress 0.2180643
## Run 6 stress 0.1691633
## Run 7 stress 0.1681098
## Run 8 stress 0.1702249
## Run 9 stress 0.2232853
## Run 10 stress 0.2090027
## Run 11 stress 0.1750904
## Run 12 stress 0.1924574
## Run 13 stress 0.2729239
## Run 14 stress 0.2672364
## Run 15 stress 0.1712287
## Run 16 stress 0.1760503
## Run 17 stress 0.1682299
## Run 18 stress 0.1840784
## Run 19 stress 0.2234649
## Run 20 stress 0.1846116
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
var_stress_nmds_aitch <- round(nmds_source_aitch$stress, 5)
var_stress_nmds_aitch## [1] 0.16754
scores_source_aitch= nmds_source_aitch %>% vegan::scores()
metadata_fltr$Source <- factor(metadata_fltr$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_aitch<- ggplot() +
geom_point(data=data.frame(scores_source_aitch) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(metadata_fltr, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_aitch <- nmds_plot_aitch +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitchlibrary(vegan)
bray_dist =vegdist(t(otu_table_fltr), method = "bray")
pcoas=wcmdscale(bray_dist, eig = T)
pcoa_plot<- ggplot() +
geom_point(data=data.frame(pcoas$points) %>% #individuals
rownames_to_column(var = "SampleID")%>%
left_join(meta, by = "SampleID"),
aes(x=Dim1, y=Dim2,color = Source, shape = Location ), size=4) +
#geom_vline(xintercept = 0, linetype = 2) + #lines-cross
#geom_hline(yintercept = 0, linetype = 2) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw()+
#geom_polygon(data=data.frame(pca$x) %>% #individuals
#rownames_to_column(var = "SampleID")%>%
#left_join(metadata, by = "SampleID")%>%
#drop_na() %>%
#group_by(Type_of_soil) %>%
#slice(chull(PC1, PC2)),
#aes(x=PC1, y=PC2, fill=Type_of_soil, color=Type_of_soil),
#alpha = 0.3,
#show.legend = FALSE)+
ylab("PCoA2")+xlab("PCoA1")
pcoa_plotaitchison independientes por loction
library(vegan)
library(ggplot2)
library(dplyr)
library(tidyr)
library(cowplot) # Cargar la librería para combinar gráficos
# Agregar pseudocount y calcular la distancia de Aitchison para cada 'Location'
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
# Crear una lista para guardar los resultados de NMDS y los valores de estrés por 'Location'
nmds_list <- list()
stress_values <- c()
# Realizar NMDS para cada 'Location' y guardar los resultados en la lista
unique_locations <- unique(meta$Location)
for (loc in unique_locations) {
# Filtrar la tabla OTU por 'Location'
otu_subset <- otu_table_fltr_pseudo[, meta$Location == loc, drop=FALSE]
aitch <- vegdist(t(otu_subset), method = "aitchison")
# Ejecutar NMDS
nmds_result <- metaMDS(aitch, trymax = 20, k = 3)
# Guardar los resultados y valores de estrés en la lista
nmds_list[[loc]] <- nmds_result
stress_values[loc] <- nmds_result$stress
}## Run 0 stress 0.09286269
## Run 1 stress 0.09286263
## ... New best solution
## ... Procrustes: rmse 0.001127248 max resid 0.002186399
## ... Similar to previous best
## Run 2 stress 0.09017429
## ... New best solution
## ... Procrustes: rmse 0.043704 max resid 0.1242266
## Run 3 stress 0.09017437
## ... Procrustes: rmse 0.0000480378 max resid 0.0001201519
## ... Similar to previous best
## Run 4 stress 0.1015067
## Run 5 stress 0.09286238
## Run 6 stress 0.1020582
## Run 7 stress 0.1015077
## Run 8 stress 0.09017423
## ... New best solution
## ... Procrustes: rmse 0.00006679391 max resid 0.0001715491
## ... Similar to previous best
## Run 9 stress 0.09017468
## ... Procrustes: rmse 0.0002998592 max resid 0.0007742236
## ... Similar to previous best
## Run 10 stress 0.1019489
## Run 11 stress 0.09010485
## ... New best solution
## ... Procrustes: rmse 0.02879691 max resid 0.08604193
## Run 12 stress 0.1019491
## Run 13 stress 0.09010375
## ... New best solution
## ... Procrustes: rmse 0.0006040018 max resid 0.00175466
## ... Similar to previous best
## Run 14 stress 0.09286276
## Run 15 stress 0.09020433
## ... Procrustes: rmse 0.01953044 max resid 0.05798785
## Run 16 stress 0.1411114
## Run 17 stress 0.1019489
## Run 18 stress 0.09010307
## ... New best solution
## ... Procrustes: rmse 0.00209473 max resid 0.005696866
## ... Similar to previous best
## Run 19 stress 0.09017392
## ... Procrustes: rmse 0.03002276 max resid 0.08942171
## Run 20 stress 0.09286262
## *** Best solution repeated 1 times
## Run 0 stress 0.09353877
## Run 1 stress 0.0769705
## ... New best solution
## ... Procrustes: rmse 0.0815555 max resid 0.2052233
## Run 2 stress 0.07886688
## Run 3 stress 0.07697121
## ... Procrustes: rmse 0.0009006034 max resid 0.00377735
## ... Similar to previous best
## Run 4 stress 0.07697085
## ... Procrustes: rmse 0.0007500296 max resid 0.003145672
## ... Similar to previous best
## Run 5 stress 0.0760654
## ... New best solution
## ... Procrustes: rmse 0.04531549 max resid 0.1757149
## Run 6 stress 0.07593257
## ... New best solution
## ... Procrustes: rmse 0.02151344 max resid 0.09614764
## Run 7 stress 0.07606873
## ... Procrustes: rmse 0.01869065 max resid 0.07832724
## Run 8 stress 0.07606475
## ... Procrustes: rmse 0.02138182 max resid 0.09531475
## Run 9 stress 0.07606451
## ... Procrustes: rmse 0.02133414 max resid 0.09503959
## Run 10 stress 0.07593271
## ... Procrustes: rmse 0.0006539741 max resid 0.002707354
## ... Similar to previous best
## Run 11 stress 0.07593296
## ... Procrustes: rmse 0.0002252735 max resid 0.0009469119
## ... Similar to previous best
## Run 12 stress 0.08555555
## Run 13 stress 0.07886705
## Run 14 stress 0.07606783
## ... Procrustes: rmse 0.0192141 max resid 0.08196534
## Run 15 stress 0.07971444
## Run 16 stress 0.07697084
## Run 17 stress 0.07593299
## ... Procrustes: rmse 0.0007989614 max resid 0.003305441
## ... Similar to previous best
## Run 18 stress 0.07697085
## Run 19 stress 0.07886746
## Run 20 stress 0.07593299
## ... Procrustes: rmse 0.0001929159 max resid 0.0007967887
## ... Similar to previous best
## *** Best solution repeated 4 times
## Run 0 stress 0.09080181
## Run 1 stress 0.09080182
## ... Procrustes: rmse 0.00002879506 max resid 0.00008541925
## ... Similar to previous best
## Run 2 stress 0.09080181
## ... Procrustes: rmse 0.0000252384 max resid 0.00007395126
## ... Similar to previous best
## Run 3 stress 0.09080181
## ... Procrustes: rmse 0.00000736552 max resid 0.00002266562
## ... Similar to previous best
## Run 4 stress 0.09080184
## ... Procrustes: rmse 0.00006170891 max resid 0.0001818799
## ... Similar to previous best
## Run 5 stress 0.09080181
## ... Procrustes: rmse 0.00001008042 max resid 0.00002703597
## ... Similar to previous best
## Run 6 stress 0.09080181
## ... Procrustes: rmse 0.00002341484 max resid 0.0000681311
## ... Similar to previous best
## Run 7 stress 0.09080181
## ... Procrustes: rmse 0.000004666692 max resid 0.00001550806
## ... Similar to previous best
## Run 8 stress 0.09080181
## ... Procrustes: rmse 0.00002391561 max resid 0.00008072421
## ... Similar to previous best
## Run 9 stress 0.09080181
## ... Procrustes: rmse 0.000006583693 max resid 0.0000193473
## ... Similar to previous best
## Run 10 stress 0.09080181
## ... Procrustes: rmse 0.000001971398 max resid 0.000006157192
## ... Similar to previous best
## Run 11 stress 0.1092961
## Run 12 stress 0.09080181
## ... Procrustes: rmse 0.0000136451 max resid 0.00003854882
## ... Similar to previous best
## Run 13 stress 0.09080181
## ... Procrustes: rmse 0.000009283839 max resid 0.00003650044
## ... Similar to previous best
## Run 14 stress 0.09080181
## ... Procrustes: rmse 0.00002347726 max resid 0.00006888059
## ... Similar to previous best
## Run 15 stress 0.09080181
## ... Procrustes: rmse 0.00001835347 max resid 0.00006722972
## ... Similar to previous best
## Run 16 stress 0.09080181
## ... Procrustes: rmse 0.000001628275 max resid 0.000005246091
## ... Similar to previous best
## Run 17 stress 0.09080181
## ... Procrustes: rmse 0.000003733629 max resid 0.00001315245
## ... Similar to previous best
## Run 18 stress 0.09080181
## ... Procrustes: rmse 0.00001137739 max resid 0.00003451799
## ... Similar to previous best
## Run 19 stress 0.09080181
## ... Procrustes: rmse 0.000003433678 max resid 0.000009049462
## ... Similar to previous best
## Run 20 stress 0.09080181
## ... Procrustes: rmse 0.0000238389 max resid 0.00005808449
## ... Similar to previous best
## *** Best solution repeated 19 times
# Crear una función para generar el gráfico NMDS con stress
plot_nmds <- function(nmds_result, location_name) {
scores_df <- as.data.frame(scores(nmds_result)) %>%
rownames_to_column(var = "SampleID") %>%
left_join(meta, by = "SampleID") %>%
mutate(Location = location_name)
p <- ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Source)) +
geom_point(size = 4) +
labs(title = paste("Location:", location_name, "\nStress:", round(stress_values[location_name], 5)),
x = "NMDS1", y = "NMDS2", color = "Source") +
theme_bw() +
scale_color_jco() +
theme(legend.position = "right", legend.box = "vertical")
return(p)
}
# Aplicar la función a cada resultado NMDS y guardar los plots
nmds_plots <- lapply(names(nmds_list), function(loc) {
plot_nmds(nmds_list[[loc]], loc)
})
# Usar cowplot para combinar los gráficos
nmds_combined_plot <- plot_grid(plotlist = nmds_plots, ncol = 3, align = 'v')
# Mostrar el gráfico combinado
print(nmds_combined_plot)# Guardar la gráfica combinada si es necesario
#ggsave("combined_NMDS_plot.png", nmds_combined_plot, width = 12, height = ​``【oaicite:0】``​Permanova
meta_just = data.frame(aldex_clr_data, check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr) ## Joining with `by = join_by(`sample-id`)`
meta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
data=meta_just, method = "aitchison",
permutations = 999, strata = meta_just$Specimen)
permanovasPermanovas aitchison independientes
library(vegan)
library(dplyr)
library(tidyr)
# pseudocount
pseudocount <- 1
otu_table_fltr_pseudo <- otu_table_fltr + pseudocount
# Combine
meta_just <- metadata_with_hill_fltr %>%
dplyr::select(`sample-id`, Location, Source, Specimen) %>%
inner_join(as.data.frame(t(otu_table_fltr_pseudo)) %>% rownames_to_column(var = "sample-id"), by = "sample-id")
#
permanova_results <- list()
#
for (loc in unique(metadata_with_hill_fltr$Location)) {
meta_just_loc <- meta_just %>%
filter(Location == loc)
otu_table_loc <- otu_table_fltr_pseudo[, meta_just_loc$`sample-id`, drop = FALSE]
aitch_loc <- vegdist(t(otu_table_loc), method = "aitchison")
permanova_results[[loc]] <- adonis2(aitch_loc ~ Source,
data = meta_just_loc,
permutations = 999,
strata = meta_just_loc$Specimen)
}permanova_results## $`Monte obscuro Emiliano Zapata`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 1151
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 20333 0.25427 2.2163 0.001 ***
## Residual 13 59635 0.74573
## Total 15 79968 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $`El Castillo/Zentla`
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 15850 0.13282 1.7614 0.001 ***
## Residual 23 103480 0.86718
## Total 25 119330 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $Naolinco
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks: strata
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = aitch_loc ~ Source, data = meta_just_loc, permutations = 999, strata = meta_just_loc$Specimen)
## Df SumOfSqs R2 F Pr(>F)
## Source 2 20640 0.13271 2.0658 0.001 ***
## Residual 27 134882 0.86729
## Total 29 155522 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Prepare data
Explore
microbiome::summarize_phyloseq(full_ps_core_counts)## Compositional = NO2
## 1] Min. number of reads = 32892] Max. number of reads = 1320233] Total number of reads = 22961414] Average number of reads = 31890.84722222225] Median number of reads = 26767.57] Sparsity = 0.3817351598173526] Any OTU sum to 1 or less? NO8] Number of singletons = 09] Percent of OTUs that are singletons
## (i.e. exactly one read detected across all samples)010] Number of sample variables are: 12sampleSourceDateYearLocationWeekRaw.readsColect_numberSpecimenquant_readingEffective_readsASVs2
## [[1]]
## [1] "1] Min. number of reads = 3289"
##
## [[2]]
## [1] "2] Max. number of reads = 132023"
##
## [[3]]
## [1] "3] Total number of reads = 2296141"
##
## [[4]]
## [1] "4] Average number of reads = 31890.8472222222"
##
## [[5]]
## [1] "5] Median number of reads = 26767.5"
##
## [[6]]
## [1] "7] Sparsity = 0.381735159817352"
##
## [[7]]
## [1] "6] Any OTU sum to 1 or less? NO"
##
## [[8]]
## [1] "8] Number of singletons = 0"
##
## [[9]]
## [1] "9] Percent of OTUs that are singletons \n (i.e. exactly one read detected across all samples)0"
##
## [[10]]
## [1] "10] Number of sample variables are: 12"
##
## [[11]]
## [1] "sample" "Source" "Date" "Year"
## [5] "Location" "Week" "Raw.reads" "Colect_number"
## [9] "Specimen" "quant_reading" "Effective_reads" "ASVs"
#Get otu and metadata table of core phyloseq object
# extract otu table
otu_table_full_core <- otu_table(full_ps_core_counts@otu_table)
meta_full_core <- sample_data(full_ps_core_counts)
# Metadata
#meta_full_core <- data.frame(phyloseq::sample_data(pseq),check.names = FALSE)# NMDS aichitson
library(vegan)
pseudocount <- 1
otu_table_full_core_pseudo <- otu_table_full_core + pseudocount
aitch_fc=vegdist(t(otu_table_full_core_pseudo), method = "aitchison")
nmds_source_aitch_fc = vegan::metaMDS(aitch_fc, trymax = 20, k = 2) ## Run 0 stress 0.1321064
## Run 1 stress 0.1427431
## Run 2 stress 0.1794177
## Run 3 stress 0.171119
## Run 4 stress 0.1729933
## Run 5 stress 0.1426311
## Run 6 stress 0.132074
## ... New best solution
## ... Procrustes: rmse 0.004520532 max resid 0.03590215
## Run 7 stress 0.1681577
## Run 8 stress 0.1748434
## Run 9 stress 0.153449
## Run 10 stress 0.1320773
## ... Procrustes: rmse 0.001108172 max resid 0.007156763
## ... Similar to previous best
## Run 11 stress 0.1427431
## Run 12 stress 0.1321064
## ... Procrustes: rmse 0.004522692 max resid 0.03591593
## Run 13 stress 0.1683264
## Run 14 stress 0.1620791
## Run 15 stress 0.1321064
## ... Procrustes: rmse 0.004520646 max resid 0.03591293
## Run 16 stress 0.1519927
## Run 17 stress 0.1426311
## Run 18 stress 0.132074
## ... New best solution
## ... Procrustes: rmse 0.00000686812 max resid 0.00004317669
## ... Similar to previous best
## Run 19 stress 0.151375
## Run 20 stress 0.158456
## *** Best solution repeated 1 times
var_stress_nmds_aitch_fc <- round(nmds_source_aitch_fc$stress, 5)
var_stress_nmds_aitch_fc## [1] 0.13207
scores_source_aitch_fc= nmds_source_aitch_fc %>% vegan::scores()
meta_full_core$Source <- factor(meta_full_core$Source, levels = c("Soil associated", "Rhizhomorph", "Stipe"))
nmds_plot_aitch_fc<- ggplot() +
geom_point(data=data.frame(scores_source_aitch_fc) %>% #individuals
rownames_to_column(var = "sample")%>%
left_join(meta_full_core, by = "sample"),
aes(x=NMDS1, y=NMDS2, color = Source, shape = Location), size=4) +
# theme_linedraw()+
scale_fill_jco()+#color of points
scale_color_jco()+#color of points
theme(axis.text = element_text(colour = "black", size = 12),
axis.title = element_text(colour = "black", size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 12),
legend.position = "right",
legend.box = "vertical")+ theme_bw() +
ylab("NMDS2")+xlab("NMDS1")
nmds_plot_aitch_fc <- nmds_plot_aitch_fc +
annotate("text", x = Inf, y = -Inf, label = paste("Stress:", var_stress_nmds_aitch_fc),
hjust = 3.1, vjust = -1.9, size = 3)
nmds_plot_aitch_fcPermanova
meta_core_pseudo = data.frame(t(otu_table_full_core_pseudo), check.names = F) %>%
rownames_to_column(var = "sample") %>% inner_join(meta_full_core) ## Joining with `by = join_by(sample)`
perm_core = adonis2(t(otu_table_full_core_pseudo) ~ Source*Location,
data=meta_core_pseudo, method = "aitchison",
permutations = 999, strata = meta_core_pseudo$Specimen)
perm_coremeta_just = data.frame(t(otu_table_fltr_pseudo), check.names = F) %>%
rownames_to_column(var = "sample-id") %>% inner_join(metadata_with_hill_fltr)## Joining with `by = join_by(`sample-id`)`
permanovas = adonis2(t(otu_table_fltr_pseudo) ~ Source*Location,
data=meta_just, method = "aitchison",
permutations = 999, strata = meta_just$Specimen)
permanovas##res prim tutorial https://www.yanh.org/2021/01/01/microbiome-r/
library(ANCOMBC)## Registered S3 methods overwritten by 'treeio':
## method from
## MRCA.phylo tidytree
## MRCA.treedata tidytree
## Nnode.treedata tidytree
## Ntip.treedata tidytree
## ancestor.phylo tidytree
## ancestor.treedata tidytree
## child.phylo tidytree
## child.treedata tidytree
## full_join.phylo tidytree
## full_join.treedata tidytree
## groupClade.phylo tidytree
## groupClade.treedata tidytree
## groupOTU.phylo tidytree
## groupOTU.treedata tidytree
## is.rooted.treedata tidytree
## nodeid.phylo tidytree
## nodeid.treedata tidytree
## nodelab.phylo tidytree
## nodelab.treedata tidytree
## offspring.phylo tidytree
## offspring.treedata tidytree
## parent.phylo tidytree
## parent.treedata tidytree
## root.treedata tidytree
## rootnode.phylo tidytree
## sibling.phylo tidytree
####################################################################
#Run ANCOM-BC at the Family level and only including the prevalent genera
####################################################################
set.seed(123)
output_genus_full = ancombc2(data = tse,
assay_name = "counts",
tax_level = "Genus",
fix_formula = "Source+Location",
rand_formula = "(1|Specimen)",
p_adj_method = "holm",
pseudo_sens = TRUE,
prv_cut = 0.10,
lib_cut = 1000,
s0_perc = 0.05,
group = "Source",
struc_zero = TRUE,
neg_lb = TRUE,
alpha = 0.05,
n_cl = 2,
verbose = TRUE,
global = TRUE,
pairwise = TRUE,
dunnet = TRUE,
trend = TRUE,
iter_control = list(tol = 1e-2, max_iter = 20,
verbose = TRUE),
em_control = list(tol = 1e-5, max_iter = 100),
lme_control = lme4::lmerControl(),
mdfdr_control = list(fwer_ctrl_method = "holm", B = 100),
trend_control = list(contrast = list(matrix(c(1, 0, -1, 1),
nrow = 2,
byrow = TRUE),
matrix(c(-1, 0, 1, -1),
nrow = 2,
byrow = TRUE),
matrix(c(1, 0, 1, -1),
nrow = 2,
byrow = TRUE)),
node = list(2, 2, 1),
solver = "ECOS",
B = 10))## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Found more than one class "phylo" in cache; using the first, from namespace 'phyloseq'
## Also defined by 'tidytree'
## Obtaining initial estimates ...
## REML iteration = 1, epsilon = 2.3
## REML iteration = 2, epsilon = 0.61
## REML iteration = 3, epsilon = 0.36
## REML iteration = 4, epsilon = 0.21
## REML iteration = 5, epsilon = 0.13
## REML iteration = 6, epsilon = 0.079
## REML iteration = 7, epsilon = 0.049
## REML iteration = 8, epsilon = 0.031
## REML iteration = 9, epsilon = 0.019
## REML iteration = 10, epsilon = 0.012
## REML iteration = 11, epsilon = 0.0078
## Estimating sample-specific biases ...
## ANCOM-BC2 primary results ...
## The sensitivity analysis for the pseudo-count addition ...
## ANCOM-BC2 global test ...
## ANCOM-BC2 pairwise directional test ...
## ANCOM-BC2 Dunnet's type of test ...
## ANCOM-BC2 trend test ...
save.image("src/Diversity.RData")